diff options
Diffstat (limited to 'vendor/ruby-ole')
-rw-r--r-- | vendor/ruby-ole/ChangeLog | 62 | ||||
-rw-r--r-- | vendor/ruby-ole/README | 115 | ||||
-rw-r--r-- | vendor/ruby-ole/Rakefile | 209 | ||||
-rwxr-xr-x | vendor/ruby-ole/bin/oletool | 41 | ||||
-rw-r--r-- | vendor/ruby-ole/data/propids.yaml | 56 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/base.rb | 7 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/file_system.rb | 2 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/ranges_io.rb | 231 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage.rb | 3 | ||||
-rwxr-xr-x | vendor/ruby-ole/lib/ole/storage/base.rb | 916 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage/file_system.rb | 423 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/storage/meta_data.rb | 148 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/support.rb | 256 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types.rb | 2 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types/base.rb | 251 | ||||
-rw-r--r-- | vendor/ruby-ole/lib/ole/types/property_set.rb | 165 |
16 files changed, 0 insertions, 2887 deletions
diff --git a/vendor/ruby-ole/ChangeLog b/vendor/ruby-ole/ChangeLog deleted file mode 100644 index 1e7c80b59..000000000 --- a/vendor/ruby-ole/ChangeLog +++ /dev/null @@ -1,62 +0,0 @@ -== 1.2.8.2 / 2009-01-01 - -- Update code to support ruby 1.9.1 - -== 1.2.8.1 / 2008-10-22 - -- Fix a couple of breakages when using $KCODE = 'UTF8' - -== 1.2.8 / 2008-10-08 - -- Check in the new fixes to the mbat support. -- Update README to be a bit more useful. - -== 1.2.7 / 2008-08-12 - -- Prepare Ole::Types::PropertySet for write support. -- Introduce Ole::Storage#meta_data as an easy interface to meta data stored - within various property sets. -- Add new --metadata action to oletool to dump said metadata. -- Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type - function to try to guess mime type of a file based on some simple heuristics. -- Restructure project files a bit, and pull in file_system & meta_data support - by default. -- More tests - now have 100% coverage. - -== 1.2.6 / 2008-07-21 - -- Fix FileClass#expand_path to work properly on darwin (issue #2) -- Guard against Enumerable#sum clash with active support (issue #3) - -== 1.2.5 / 2008-02-16 - -- Make all tests pass on ruby 1.9. - -== 1.2.4 / 2008-01-09 - -- Make all tests pass on windows (issue #1). -- Make all tests pass on a power pc (running ubuntu). -- Property set convenience access functions. - -== 1.2.3 / 2007-12-28 - -- MBAT write support re-implmented. Can now write files over ~8mb again. -- Minor fixes (truncation in #flush, file modification timestamps) -- More test coverage -- Initial (read-only) property set support. -- Complete filesystem api, to pass most of the rubyzip tests. -- Add a ChangeLog :). - -== 1.2.2 / 2007-11-05 - -- Lots of test updates, 90% coverage. -- Fix +to_tree+ method to be more efficient, and stream output. -- Optimizations from benchmarks and profiling, mostly for writes. Fixed - AllocationTable#resize_chain, RangesIOResizable#truncate and - AllocationTable#free_block. -- Add in filesystem test file from rubyzip, and start working on a - filesystem api. - -== 1.2.1 / 2007-08-20 - -- Separate out from ruby-msg as new project. diff --git a/vendor/ruby-ole/README b/vendor/ruby-ole/README deleted file mode 100644 index 0208c5abd..000000000 --- a/vendor/ruby-ole/README +++ /dev/null @@ -1,115 +0,0 @@ -= Introduction - -The ruby-ole library provides a variety of functions primarily for -working with OLE2 structured storage files, such as those produced by -Microsoft Office - eg *.doc, *.msg etc. - -= Example Usage - -Here are some examples of how to use the library functionality, -categorised roughly by purpose. - -1. Reading and writing files within an OLE container - - The recommended way to manipulate the contents is via the - "file_system" API, whereby you use Ole::Storage instance methods - similar to the regular File and Dir class methods. - - ole = Ole::Storage.open('oleWithDirs.ole', 'rb+') - p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"] - p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'" - ole.dir.mkdir('newdir') - -2. Accessing OLE meta data - - Some convenience functions are provided for (currently read only) - access to OLE property sets and other sources of meta data. - - ole = Ole::Storage.open('test_word_95.doc') - p ole.meta_data.file_format # => "MSWordDoc" - p ole.meta_data.mime_type # => "application/msword" - p ole.meta_data.doc_author.split.first # => "Charles" - -3. Raw access to underlying OLE internals - - This is probably of little interest to most developers using the - library, but for some use cases you may need to drop down to the - lower level API on which the "file_system" API is constructed, - which exposes more of the format details. - - <tt>Ole::Storage</tt> files can have multiple files with the same name, - or with a slash in the name, and other things that are probably - strictly invalid. This API is the only way to access those files. - - You can access the header object directly: - - p ole.header.num_sbat # => 1 - p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"] - - You can directly access the array of all Dirent objects, - including the root: - - p ole.dirents.length # => 5 - puts ole.root.to_tree - # => - - #<Dirent:"Root Entry"> - |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000..."> - |- #<Dirent:"\001CompObj" size=98 data="\001\000\376\377\003..."> - |- #<Dirent:"WordDocument" size=2574 data="\334\245e\000-..."> - \- #<Dirent:"\005SummaryInformation" size=54788 data="\376\377\000\000\001..."> - - You can access (through RangesIO methods, or by using the - relevant Dirent and AllocationTable methods) information like where within - the container a stream is located (these are offset/length pairs): - - p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]] - -See the documentation for each class for more details. - -= Thanks - -* The code contained in this project was initially based on chicago's libole - (source available at http://prdownloads.sf.net/chicago/ole.tgz). - -* It was later augmented with some corrections by inspecting pole, and (purely - for header definitions) gsf. - -* The property set parsing code came from the apache java project POIFS. - -* The excellent idea for using a pseudo file system style interface by providing - #file and #dir methods which mimic File and Dir, was borrowed (along with almost - unchanged tests!) from Thomas Sondergaard's rubyzip. - -= TODO - -== 1.2.9 - -* add buffering to rangesio so that performance for small reads and writes - isn't so awful. maybe try and remove the bottlenecks of unbuffered first - with more profiling, then implement the buffering on top of that. -* fix mode strings - like truncate when using 'w+', supporting append - 'a+' modes etc. done? -* make ranges io obey readable vs writeable modes. -* more RangesIO completion. ie, doesn't support #<< at the moment. -* maybe some oletool doc. -* make sure `rake test' runs tests both with $KCODE='UTF8', and without, - and maybe ensure i don't regress on 1.9 and jruby either now that they're - fixed. - -== 1.3.1 - -* fix property sets a bit more. see TODO in Ole::Storage::MetaData -* ability to zero out padding and unused blocks -* case insensitive mode for ole/file_system? -* better tests for mbat support. -* further doc cleanup -* add in place testing for jruby and ruby1.9 - -== Longer term - -* more benchmarking, profiling, and speed fixes. was thinking vs other - ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other - ole implementations (maybe perl's, and poifs) just to check its in the - ballpark, with no remaining silly bottlenecks. -* supposedly vba does something weird to ole files. test that. - diff --git a/vendor/ruby-ole/Rakefile b/vendor/ruby-ole/Rakefile deleted file mode 100644 index 1153bb39a..000000000 --- a/vendor/ruby-ole/Rakefile +++ /dev/null @@ -1,209 +0,0 @@ -require 'rake/rdoctask' -require 'rake/testtask' -require 'rake/packagetask' -require 'rake/gempackagetask' - -require 'rbconfig' -require 'fileutils' - -$:.unshift 'lib' - -require 'ole/storage' - -PKG_NAME = 'ruby-ole' -PKG_VERSION = Ole::Storage::VERSION - -task :default => [:test] - -Rake::TestTask.new do |t| - t.test_files = FileList["test/test_*.rb"] - t.warning = true - t.verbose = true -end - -begin - require 'rcov/rcovtask' - # NOTE: this will not do anything until you add some tests - desc "Create a cross-referenced code coverage report" - Rcov::RcovTask.new do |t| - t.test_files = FileList['test/test*.rb'] - t.ruby_opts << "-Ilib" # in order to use this rcov - t.rcov_opts << "--xrefs" # comment to disable cross-references - t.verbose = true - end -rescue LoadError - # Rcov not available -end - -Rake::RDocTask.new do |t| - t.rdoc_dir = 'doc' - t.rdoc_files.include 'lib/**/*.rb' - t.rdoc_files.include 'README', 'ChangeLog' - t.title = "#{PKG_NAME} documentation" - t.options += %w[--line-numbers --inline-source --tab-width 2] - t.main = 'README' -end - -spec = Gem::Specification.new do |s| - s.name = PKG_NAME - s.version = PKG_VERSION - s.summary = %q{Ruby OLE library.} - s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.} - s.authors = ['Charles Lowe'] - s.email = %q{aquasync@gmail.com} - s.homepage = %q{http://code.google.com/p/ruby-ole} - s.rubyforge_project = %q{ruby-ole} - - s.executables = ['oletool'] - s.files = ['README', 'Rakefile', 'ChangeLog', 'data/propids.yaml'] - s.files += FileList['lib/**/*.rb'] - s.files += FileList['test/test_*.rb', 'test/*.doc'] - s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation'] - s.files += FileList['bin/*'] - s.test_files = FileList['test/test_*.rb'] - - s.has_rdoc = true - s.extra_rdoc_files = ['README', 'ChangeLog'] - s.rdoc_options += [ - '--main', 'README', - '--title', "#{PKG_NAME} documentation", - '--tab-width', '2' - ] -end - -Rake::GemPackageTask.new(spec) do |t| - t.gem_spec = spec - t.need_tar = true - t.need_zip = false - t.package_dir = 'build' -end - -desc 'Run various benchmarks' -task :benchmark do - require 'benchmark' - require 'tempfile' - require 'ole/file_system' - - # should probably add some read benchmarks too - def write_benchmark opts={} - files, size = opts[:files], opts[:size] - block_size = opts[:block_size] || 100_000 - block = 0.chr * block_size - blocks, remaining = size.divmod block_size - remaining = 0.chr * remaining - Tempfile.open 'ole_storage_benchmark' do |temp| - Ole::Storage.open temp do |ole| - files.times do |i| - ole.file.open "file_#{i}", 'w' do |f| - blocks.times { f.write block } - f.write remaining - end - end - end - end - end - - Benchmark.bm do |bm| - bm.report 'write_1mb_1x5' do - 5.times { write_benchmark :files => 1, :size => 1_000_000 } - end - - bm.report 'write_1mb_2x5' do - 5.times { write_benchmark :files => 1_000, :size => 1_000 } - end - end -end - -=begin - -1.2.1: - - user system total real -write_1mb_1x5 73.920000 8.400000 82.320000 ( 91.893138) - -revision 17 (speed up AllocationTable#free_block by using -@sparse attribute, and using Array#index otherwise): - - user system total real -write_1mb_1x5 57.910000 6.190000 64.100000 ( 66.207993) -write_1mb_2x5266.310000 31.750000 298.060000 (305.877203) - -add in extra resize_chain fix (return blocks to avoid calling -AllocationTable#chain twice): - - user system total real -write_1mb_1x5 43.140000 5.480000 48.620000 ( 51.835942) - -add in RangesIOResizeable fix (cache @blocks, to avoid calling -AllocationTable#chain at all when resizing now, just pass it -to AllocationTable#resize_chain): - - user system total real -write_1mb_1x5 29.770000 5.180000 34.950000 ( 39.916747) - -40 seconds is still a really long time to write out 5 megs. -of course, this is all with a 1_000 byte block size, which is -a very small wite. upping this to 100_000 bytes: - - user system total real -write_1mb_1x5 0.540000 0.130000 0.670000 ( 1.051862) - -so it seems that that makes a massive difference. so i really -need buffering in RangesIO if I don't want it to really hurt -for small writes, as all the resize code is kind of expensive. - -one of the costly things at the moment, is RangesIO#offset_and_size, -which is called for each write, and re-finds which range we are in. -that should obviously be changed, to a fixed one that is invalidated -on seeks. buffering would hide that problem to some extent, but i -should fix it anyway. - -re-running the original 1.2.1 with 100_000 byte block size: - - user system total real -write_1mb_1x5 15.590000 2.230000 17.820000 ( 18.704910) - -so there the really badly non-linear AllocationTable#resize_chain is -being felt. - -back to current working copy, running full benchmark: - - user system total real -write_1mb_1x5 0.530000 0.150000 0.680000 ( 0.708919) -write_1mb_2x5227.940000 31.260000 259.200000 (270.200960) - -not surprisingly, the second case hasn't been helped much by the fixes -so far, as they only really help multiple resizes and writes for a file. -this could be pain in the new file system code - potentially searching -through Dirent#children at creation time. - -to test, i'll profile creating 1_000 files, without writing anything: - - user system total real -write_1mb_2x5 16.990000 1.830000 18.820000 ( 19.900568) - -hmmm, so thats not all of it. maybe its the initial chain calls, etc? -writing 1 byte: - - user system total real -write_1mb_1x5 0.520000 0.120000 0.640000 ( 0.660638) -write_1mb_2x5 19.810000 2.280000 22.090000 ( 22.696214) - -weird. - -100 bytes: - - user system total real -write_1mb_1x5 0.560000 0.140000 0.700000 ( 1.424974) -write_1mb_2x5 22.940000 2.840000 25.780000 ( 26.556346) - -500 bytes: - - user system total real -write_1mb_1x5 0.530000 0.150000 0.680000 ( 1.139738) -write_1mb_2x5 77.260000 10.130000 87.390000 ( 91.671086) - -what happens there? very strange. - -=end - diff --git a/vendor/ruby-ole/bin/oletool b/vendor/ruby-ole/bin/oletool deleted file mode 100755 index d81afab5a..000000000 --- a/vendor/ruby-ole/bin/oletool +++ /dev/null @@ -1,41 +0,0 @@ -#! /usr/bin/ruby - -require 'optparse' -require 'rubygems' -require 'ole/storage' - -def oletool - opts = {:verbose => false, :action => :tree} - op = OptionParser.new do |op| - op.banner = "Usage: oletool [options] [files]" - op.separator '' - op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree } - op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack } - op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype } - op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata } - op.separator '' - op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v } - op.on_tail('-h', '--help', 'Show this message') { puts op; exit } - end - files = op.parse ARGV - if files.empty? - puts 'Must specify 1 or more msg files.' - puts op - exit 1 - end - Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL - files.each do |file| - case opts[:action] - when :tree - Ole::Storage.open(file) { |ole| puts ole.root.to_tree } - when :repack - Ole::Storage.open file, 'rb+', &:repack - when :metadata - Ole::Storage.open(file) { |ole| y ole.meta_data.to_h } - when :mimetype - puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type } - end - end -end - -oletool diff --git a/vendor/ruby-ole/data/propids.yaml b/vendor/ruby-ole/data/propids.yaml deleted file mode 100644 index 9ac43ffe1..000000000 --- a/vendor/ruby-ole/data/propids.yaml +++ /dev/null @@ -1,56 +0,0 @@ -"{f29f85e0-4ff9-1068-ab91-08002b27b3d9}": - - FMTID_SummaryInformation - - 2: doc_title - 3: doc_subject - 4: doc_author - 5: doc_keywords - 6: doc_comments - 7: doc_template - 8: doc_last_author - 9: doc_rev_number - 10: doc_edit_time - 11: doc_last_printed - 12: doc_created_time - 13: doc_last_saved_time - 14: doc_page_count - 15: doc_word_count - 16: doc_char_count - 18: doc_app_name - 19: security - -"{d5cdd502-2e9c-101b-9397-08002b2cf9ae}": - - FMTID_DocSummaryInfo - - 2: doc_category - 3: doc_presentation_target - 4: doc_byte_count - 5: doc_line_count - 6: doc_para_count - 7: doc_slide_count - 8: doc_note_count - 9: doc_hidden_count - 10: mmclips - 11: scale_crop - 12: heading_pairs - 13: doc_part_titles - 14: doc_manager - 15: doc_company - 16: links_up_to_date - -"{d5cdd505-2e9c-101b-9397-08002b2cf9ae}": - - FMTID_UserDefinedProperties - - {} - -# just dumped these all here. if i can confirm any of these -# better, i can update this file so they're recognized. -#0b63e343-9ccc-11d0-bcdb-00805fccce04 -#0b63e350-9ccc-11d0-bcdb-00805fccce04 NetLibrary propset? -#31f400a0-fd07-11cf-b9bd-00aa003db18e ScriptInfo propset? -#49691c90-7e17-101a-a91c-08002b2ecda9 Query propset? -#560c36c0-503a-11cf-baa1-00004c752a9a -#70eb7a10-55d9-11cf-b75b-00aa0051fe20 HTMLInfo propset -#85ac0960-1819-11d1-896f-00805f053bab message propset? -#aa568eec-e0e5-11cf-8fda-00aa00a14f93 NNTP SummaryInformation propset? -#b725f130-47ef-101a-a5f1-02608c9eebac Storage propset -#c82bf596-b831-11d0-b733-00aa00a1ebd2 NetLibraryInfo propset -#c82bf597-b831-11d0-b733-00aa00a1ebd2 LinkInformation propset? -#d1b5d3f0-c0b3-11cf-9a92-00a0c908dbf1 LinkInformation propset? diff --git a/vendor/ruby-ole/lib/ole/base.rb b/vendor/ruby-ole/lib/ole/base.rb deleted file mode 100644 index ee1bc0431..000000000 --- a/vendor/ruby-ole/lib/ole/base.rb +++ /dev/null @@ -1,7 +0,0 @@ - -require 'ole/support' - -module Ole # :nodoc: - Log = Logger.new_with_callstack -end - diff --git a/vendor/ruby-ole/lib/ole/file_system.rb b/vendor/ruby-ole/lib/ole/file_system.rb deleted file mode 100644 index 24d330a92..000000000 --- a/vendor/ruby-ole/lib/ole/file_system.rb +++ /dev/null @@ -1,2 +0,0 @@ -# keeping this file around for now, but will delete later on... -require 'ole/storage/file_system' diff --git a/vendor/ruby-ole/lib/ole/ranges_io.rb b/vendor/ruby-ole/lib/ole/ranges_io.rb deleted file mode 100644 index bfca4fe09..000000000 --- a/vendor/ruby-ole/lib/ole/ranges_io.rb +++ /dev/null @@ -1,231 +0,0 @@ -# need IO::Mode -require 'ole/support' - -# -# = Introduction -# -# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder -# slices of the input file by providing a list of ranges. Intended as an initial measure to curb -# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with -# no method to stream it. -# -# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file -# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just -# getting 16 bytes doesn't read the whole thing). -# -# In the simplest case it can be used with a single range to provide a limited io to a section of -# a file. -# -# = Limitations -# -# * No buffering. by design at the moment. Intended for large reads -# -# = TODO -# -# On further reflection, this class is something of a joining/optimization of -# two separate IO classes. a SubfileIO, for providing access to a range within -# a File as a separate IO object, and a ConcatIO, allowing the presentation of -# a bunch of io objects as a single unified whole. -# -# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will -# convert a whole mime message into an IO stream, that can be read from. -# It will just be the concatenation of a series of IO objects, corresponding to -# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the -# original message proper, or RangesIO as provided by the Attachment#data, that -# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the- -# fly. Thus the attachment, in its plain or encoded form, and the message as a -# whole never exists as a single string in memory, as it does now. This is a -# fair bit of work to achieve, but generally useful I believe. -# -# This class isn't ole specific, maybe move it to my general ruby stream project. -# -class RangesIO - attr_reader :io, :mode, :ranges, :size, :pos - # +io+:: the parent io object that we are wrapping. - # +mode+:: the mode to use - # +params+:: hash of params. - # * :ranges - byte offsets, either: - # 1. an array of ranges [1..2, 4..5, 6..8] or - # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above - # (think the way String indexing works) - # * :close_parent - boolean to close parent when this object is closed - # - # NOTE: the +ranges+ can overlap. - def initialize io, mode='r', params={} - mode, params = 'r', mode if Hash === mode - ranges = params[:ranges] - @params = {:close_parent => false}.merge params - @mode = IO::Mode.new mode - @io = io - # convert ranges to arrays. check for negative ranges? - ranges ||= [0, io.size] - @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r } - # calculate size - @size = @ranges.inject(0) { |total, (pos, len)| total + len } - # initial position in the file - @pos = 0 - - # handle some mode flags - truncate 0 if @mode.truncate? - seek size if @mode.append? - end - -#IOError: closed stream -# get this for reading, writing, everything... -#IOError: not opened for writing - - # add block form. TODO add test for this - def self.open(*args, &block) - ranges_io = new(*args) - if block_given? - begin; yield ranges_io - ensure; ranges_io.close - end - else - ranges_io - end - end - - def pos= pos, whence=IO::SEEK_SET - case whence - when IO::SEEK_SET - when IO::SEEK_CUR - pos += @pos - when IO::SEEK_END - pos = @size + pos - else raise Errno::EINVAL - end - raise Errno::EINVAL unless (0...@size) === pos - @pos = pos - end - - alias seek :pos= - alias tell :pos - - def close - @io.close if @params[:close_parent] - end - - # returns the [+offset+, +size+], pair inorder to read/write at +pos+ - # (like a partial range), and its index. - def offset_and_size pos - total = 0 - ranges.each_with_index do |(offset, size), i| - if pos <= total + size - diff = pos - total - return [offset + diff, size - diff], i - end - total += size - end - # should be impossible for any valid pos, (0...size) === pos - raise ArgumentError, "no range for pos #{pos.inspect}" - end - - def eof? - @pos == @size - end - - # read bytes from file, to a maximum of +limit+, or all available if unspecified. - def read limit=nil - data = '' - return data if eof? - limit ||= size - partial_range, i = offset_and_size @pos - # this may be conceptually nice (create sub-range starting where we are), but - # for a large range array its pretty wasteful. even the previous way was. but - # i'm not trying to optimize this atm. it may even go to c later if necessary. - ([partial_range] + ranges[i+1..-1]).each do |pos, len| - @io.seek pos - if limit < len - # convoluted, to handle read errors. s may be nil - s = @io.read limit - @pos += s.length if s - break data << s - end - # convoluted, to handle ranges beyond the size of the file - s = @io.read len - @pos += s.length if s - data << s - break if s.length != len - limit -= len - end - data - end - - # you may override this call to update @ranges and @size, if applicable. - def truncate size - raise NotImplementedError, 'truncate not supported' - end - - # using explicit forward instead of an alias now for overriding. - # should override truncate. - def size= size - truncate size - end - - def write data - # short cut. needed because truncate 0 may return no ranges, instead of empty range, - # thus offset_and_size fails. - return 0 if data.empty? - data_pos = 0 - # if we don't have room, we can use the truncate hook to make more space. - if data.length > @size - @pos - begin - truncate @pos + data.length - rescue NotImplementedError - raise IOError, "unable to grow #{inspect} to write #{data.length} bytes" - end - end - partial_range, i = offset_and_size @pos - ([partial_range] + ranges[i+1..-1]).each do |pos, len| - @io.seek pos - if data_pos + len > data.length - chunk = data[data_pos..-1] - @io.write chunk - @pos += chunk.length - data_pos = data.length - break - end - @io.write data[data_pos, len] - @pos += len - data_pos += len - end - data_pos - end - - alias << write - - # i can wrap it in a buffered io stream that - # provides gets, and appropriately handle pos, - # truncate. mostly added just to past the tests. - # FIXME - def gets - s = read 1024 - i = s.index "\n" - @pos -= s.length - (i+1) - s[0..i] - end - alias readline :gets - - def inspect - # the rescue is for empty files - pos, len = (@ranges[offset_and_size(@pos).last] rescue [nil, nil]) - range_str = pos ? "#{pos}..#{pos+len}" : 'nil' - "#<#{self.class} io=#{io.inspect}, size=#@size, pos=#@pos, "\ - "range=#{range_str}>" - end -end - -# this subclass of ranges io explicitly ignores the truncate part of 'w' modes. -# only really needed for the allocation table writes etc. maybe just use explicit modes -# for those -# better yet write a test that breaks before I fix it. added nodoc for the -# time being. -class RangesIONonResizeable < RangesIO # :nodoc: - def initialize io, mode='r', params={} - mode, params = 'r', mode if Hash === mode - flags = IO::Mode.new(mode).flags & ~IO::TRUNC - super io, flags, params - end -end - diff --git a/vendor/ruby-ole/lib/ole/storage.rb b/vendor/ruby-ole/lib/ole/storage.rb deleted file mode 100644 index 02e851df7..000000000 --- a/vendor/ruby-ole/lib/ole/storage.rb +++ /dev/null @@ -1,3 +0,0 @@ -require 'ole/storage/base' -require 'ole/storage/file_system' -require 'ole/storage/meta_data' diff --git a/vendor/ruby-ole/lib/ole/storage/base.rb b/vendor/ruby-ole/lib/ole/storage/base.rb deleted file mode 100755 index 3c41b21a2..000000000 --- a/vendor/ruby-ole/lib/ole/storage/base.rb +++ /dev/null @@ -1,916 +0,0 @@ -require 'tempfile' - -require 'ole/base' -require 'ole/types' -require 'ole/ranges_io' - -module Ole # :nodoc: - # - # This class is the primary way the user interacts with an OLE storage file. - # - # = TODO - # - # * the custom header cruft for Header and Dirent needs some love. - # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent, - # and, in a manner of speaking, but arguably different, Storage itself. - # they have differing api's which would be nice to rethink. - # AllocationTable::Big must be created aot now, as it is used for all subsequent reads. - # - class Storage - # thrown for any bogus OLE file errors. - class FormatError < StandardError # :nodoc: - end - - VERSION = '1.2.8.2' - - # options used at creation time - attr_reader :params - # The top of the ole tree structure - attr_reader :root - # The tree structure in its original flattened form. only valid after #load, or #flush. - attr_reader :dirents - # The underlying io object to/from which the ole object is serialized, whether we - # should close it, and whether it is writeable - attr_reader :io, :close_parent, :writeable - # Low level internals, you probably shouldn't need to mess with these - attr_reader :header, :bbat, :sbat, :sb_file - - # +arg+ should be either a filename, or an +IO+ object, and needs to be seekable. - # +mode+ is optional, and should be a regular mode string. - def initialize arg, mode=nil, params={} - params, mode = mode, nil if Hash === mode - params = {:update_timestamps => true}.merge(params) - @params = params - - # get the io object - @close_parent, @io = if String === arg - mode ||= 'rb' - [true, open(arg, mode)] - else - raise ArgumentError, 'unable to specify mode string with io object' if mode - [false, arg] - end - # do we have this file opened for writing? don't know of a better way to tell - # (unless we parse the mode string in the open case) - # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more - # reason to use mode string parsing when available, and fall back to something like - # io.writeable? otherwise. - @writeable = begin - if mode - IO::Mode.new(mode).writeable? - else - @io.flush - # this is for the benefit of ruby-1.9 - @io.syswrite('') if @io.respond_to?(:syswrite) - true - end - rescue IOError - false - end - # silence undefined warning in clear - @sb_file = nil - # if the io object has data, we should load it, otherwise start afresh - # this should be based on the mode string rather. - @io.size > 0 ? load : clear - end - - # somewhat similar to File.open, the open class method allows a block form where - # the Ole::Storage object is automatically closed on completion of the block. - def self.open arg, mode=nil, params={} - ole = new arg, mode, params - if block_given? - begin yield ole - ensure; ole.close - end - else ole - end - end - - # load document from file. - # - # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :) - # - # 1. reterminate any chain not ending in EOC. - # compare file size with actually allocated blocks per file. - # 2. pass through all chain heads looking for collisions, and making sure nothing points to them - # (ie they are really heads). in both sbat and mbat - # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks - # in the bat for them. - # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size, - # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it - # will be automatically thrown away at close time. - def load - # we always read 512 for the header block. if the block size ends up being different, - # what happens to the 109 fat entries. are there more/less entries? - @io.rewind - header_block = @io.read 512 - @header = Header.new header_block - - # create an empty bbat. - @bbat = AllocationTable::Big.new self - bbat_chain = header_block[Header::SIZE..-1].unpack 'V*' - mbat_block = @header.mbat_start - @header.num_mbat.times do - blocks = @bbat.read([mbat_block]).unpack 'V*' - mbat_block = blocks.pop - bbat_chain += blocks - end - # am i using num_bat in the right way? - @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) - - # get block chain for directories, read it, then split it into chunks and load the - # directory entries. semantics changed - used to cut at first dir where dir.type == 0 - @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE). - map { |str| Dirent.new self, str }.reject { |d| d.type_id == 0 } - - # now reorder from flat into a tree - # links are stored in some kind of balanced binary tree - # check that everything is visited at least, and at most once - # similarly with the blocks of the file. - # was thinking of moving this to Dirent.to_tree instead. - class << @dirents - def to_tree idx=0 - return [] if idx == Dirent::EOT - d = self[idx] - d.children = to_tree d.child - raise FormatError, "directory #{d.inspect} used twice" if d.idx - d.idx = idx - to_tree(d.prev) + [d] + to_tree(d.next) - end - end - - @root = @dirents.to_tree.first - Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' - unused = @dirents.reject(&:idx).length - Log.warn "#{unused} unused directories" if unused > 0 - - # FIXME i don't currently use @header.num_sbat which i should - # hmm. nor do i write it. it means what exactly again? - # which mode to use here? - @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size - @sbat = AllocationTable::Small.new self - @sbat.load @bbat.read(@header.sbat_start) - end - - def close - @sb_file.close - flush if @writeable - @io.close if @close_parent - end - - # the flush method is the main "save" method. all file contents are always - # written directly to the file by the RangesIO objects, all this method does - # is write out all the file meta data - dirents, allocation tables, file header - # etc. - # - # maybe add an option to zero the padding, and any remaining avail blocks in the - # allocation table. - # - # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization - # of bbat to AllocationTable::Big. - def flush - # update root dirent, and flatten dirent tree - @root.name = 'Root Entry' - @root.first_block = @sb_file.first_block - @root.size = @sb_file.size - @dirents = @root.flatten - - # serialize the dirents using the bbat - RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io| - @dirents.each { |dirent| io.write dirent.to_s } - padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size - io.write 0.chr * padding - @header.dirent_start = io.first_block - end - - # serialize the sbat - # perhaps the blocks used by the sbat should be marked with BAT? - RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io| - io.write @sbat.to_s - @header.sbat_start = io.first_block - @header.num_sbat = @bbat.chain(@header.sbat_start).length - end - - # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using - # truncate. then when its time to write, convert that chain and some chunk of blocks at - # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its - # done. - # this is perhaps not good, as we reclaim all bat blocks here, which - # may include the sbat we just wrote. FIXME - @bbat.map! do |b| - b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b - end - - # currently we use a loop. this could be better, but basically, - # the act of writing out the bat, itself requires blocks which get - # recorded in the bat. - # - # i'm sure that there'd be some simpler closed form solution to this. solve - # recursive func: - # - # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0)) - # bbat_len = initial_bbat_len + num_mbat_blocks - # mbat_len = ceil(bbat_len * 4 / block_size) - # - # the actual bbat allocation table is itself stored throughout the file, and that chain - # is stored in the initial blocks, and the mbat blocks. - num_mbat_blocks = 0 - io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC - # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a - # contiguous chunk at the end. - # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you - # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can - # be fixed easily, add an io truncate - @bbat.truncate! - before = @io.size - @io.truncate @bbat.block_size * (@bbat.length + 1) - while true - # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of - # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration - # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the - # mbat must remain contiguous. - bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size - # now storing the excess mbat blocks also increases the size of the bbat: - new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil - if new_num_mbat_blocks != num_mbat_blocks - # need more space for the mbat. - num_mbat_blocks = new_num_mbat_blocks - elsif io.size != bbat_data_len - # need more space for the bat - # this may grow the bbat, depending on existing available blocks - io.truncate bbat_data_len - else - break - end - end - - # now extract the info we want: - ranges = io.ranges - bbat_chain = @bbat.chain io.first_block - io.close - bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT } - # tack on the mbat stuff - @header.num_bat = bbat_chain.length - mbat_blocks = (0...num_mbat_blocks).map do - block = @bbat.free_block - @bbat[block] = AllocationTable::META_BAT - block - end - @header.mbat_start = mbat_blocks.first || AllocationTable::EOC - - # now finally write the bbat, using a not resizable io. - # the mode here will be 'r', which allows write atm. - RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s } - - # this is the mbat. pad it out. - bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max - @header.num_mbat = num_mbat_blocks - if num_mbat_blocks != 0 - # write out the mbat blocks now. first of all, where are they going to be? - mbat_data = bbat_chain[109..-1] - # expand the mbat_data to include the linked list forward pointers. - mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a. - zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a } - # pad out the last one. - mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length))) - RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f| - f.write mbat_data.flatten.pack('V*') - end - end - - # now seek back and write the header out - @io.seek 0 - @io.write @header.to_s + bbat_chain[0, 109].pack('V*') - @io.flush - end - - def clear - # initialize to equivalent of loading an empty ole document. - Log.warn 'creating new ole storage object on non-writable io' unless @writeable - @header = Header.new - @bbat = AllocationTable::Big.new self - @root = Dirent.new self, :type => :root, :name => 'Root Entry' - @dirents = [@root] - @root.idx = 0 - @sb_file.close if @sb_file - @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC - @sbat = AllocationTable::Small.new self - # throw everything else the hell away - @io.truncate 0 - end - - # could be useful with mis-behaving ole documents. or to just clean them up. - def repack temp=:file - case temp - when :file - Tempfile.open 'ole-repack' do |io| - io.binmode - repack_using_io io - end - when :mem; StringIO.open('', &method(:repack_using_io)) - else raise ArgumentError, "unknown temp backing #{temp.inspect}" - end - end - - def repack_using_io temp_io - @io.rewind - IO.copy @io, temp_io - clear - Storage.open temp_io, nil, @params do |temp_ole| - #temp_ole.root.type = :dir - Dirent.copy temp_ole.root, root - end - end - - def bat_for_size size - # note >=, not > previously. - size >= @header.threshold ? @bbat : @sbat - end - - def inspect - "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" - end - - # - # A class which wraps the ole header - # - # Header.new can be both used to load from a string, or to create from - # defaults. Serialization is accomplished with the #to_s method. - # - class Header < Struct.new( - :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift, - :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold, - :sbat_start, :num_sbat, :mbat_start, :num_mbat - ) - PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5' - SIZE = 0x4c - # i have seen it pointed out that the first 4 bytes of hex, - # 0xd0cf11e0, is supposed to spell out docfile. hmmm :) - MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic - # what you get if creating new header from scratch. - # AllocationTable::EOC isn't available yet. meh. - EOC = 0xfffffffe - DEFAULT = [ - MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6, - 0.chr * 6, 0, 1, EOC, 0.chr * 4, - 4096, EOC, 0, EOC, 0 - ] - - def initialize values=DEFAULT - values = values.unpack(PACK) if String === values - super(*values) - validate! - end - - def to_s - to_a.pack PACK - end - - def validate! - raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC - if num_bat == 0 or # is that valid for a completely empty file? - # not sure about this one. basically to do max possible bat given size of mbat - num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or - # shouldn't need to use the mbat as there is enough space in the header block - num_bat < 109 && num_mbat != 0 or - # given the size of the header is 76, if b_shift <= 6, blocks address the header. - s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or - # we only handle little endian - byte_order != "\xfe\xff" - raise FormatError, "not valid OLE2 structured storage file" - end - # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had - # 3 for this value. - # transacting_signature != "\x00" * 4 or - if threshold != 4096 or - num_mbat == 0 && mbat_start != AllocationTable::EOC or - reserved != "\x00" * 6 - Log.warn "may not be a valid OLE2 structured storage file" - end - true - end - end - - # - # +AllocationTable+'s hold the chains corresponding to files. Given - # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning - # the blocks that make up that file. - # - # There are 2 allocation tables, the bbat, and sbat, for big and small - # blocks respectively. The block chain should be loaded using either - # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt> - # as appropriate. - # - # Whether or not big or small blocks are used for a file depends on - # whether its size is over the <tt>Header#threshold</tt> level. - # - # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects, - # which are stored in blocks throughout the file. The blocks are either - # big or small, and are accessed using the <tt>AllocationTable</tt>. - # - # The bbat allocation table's data is stored in the spare room in the header - # block, and in extra blocks throughout the file as referenced by the meta - # bat. That chain is linear, as there is no higher level table. - # - # AllocationTable.new is used to create an empty table. It can parse a string - # with the #load method. Serialization is accomplished with the #to_s method. - # - class AllocationTable < Array - # a free block (I don't currently leave any blocks free), although I do pad out - # the allocation table with AVAIL to the block size. - AVAIL = 0xffffffff - EOC = 0xfffffffe # end of a chain - # these blocks are used for storing the allocation table chains - BAT = 0xfffffffd - META_BAT = 0xfffffffc - - attr_reader :ole, :io, :block_size - def initialize ole - @ole = ole - @sparse = true - super() - end - - def load data - replace data.unpack('V*') - end - - def truncate - # this strips trailing AVAILs. come to think of it, this has the potential to break - # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is - # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC - # at load time. - temp = reverse - not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1] - temp.reverse - end - - def truncate! - replace truncate - end - - def to_s - table = truncate - # pad it out some - num = @ole.bbat.block_size / 4 - # do you really use AVAIL? they probably extend past end of file, and may shortly - # be used for the bat. not really good. - table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0 - table.pack 'V*' - end - - # rewrote this to be non-recursive as it broke on a large attachment - # chain with a stack error - def chain idx - a = [] - until idx >= META_BAT - raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length - a << idx - idx = self[idx] - end - Log.warn "invalid chain terminator #{idx}" unless idx == EOC - a - end - - # Turn a chain (an array given by +chain+) of blocks (optionally - # truncated to +size+) into an array of arrays describing the stretches of - # bytes in the file that it belongs to. - # - # The blocks are Big or Small blocks depending on the table type. - def blocks_to_ranges chain, size=nil - # truncate the chain if required - chain = chain[0...(size.to_f / block_size).ceil] if size - # convert chain to ranges of the block size - ranges = chain.map { |i| [block_size * i, block_size] } - # truncate final range if required - ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size - ranges - end - - def ranges chain, size=nil - chain = self.chain(chain) unless Array === chain - blocks_to_ranges chain, size - end - - # quick shortcut. chain can be either a head (in which case the table is used to - # turn it into a chain), or a chain. it is converted to ranges, then to rangesio. - def open chain, size=nil, &block - RangesIO.open @io, :ranges => ranges(chain, size), &block - end - - def read chain, size=nil - open chain, size, &:read - end - - # catch any method that may add an AVAIL somewhere in the middle, thus invalidating - # the @sparse speedup for free_block. annoying using eval, but define_method won't - # work for this. - # FIXME - [:map!, :collect!].each do |name| - eval <<-END - def #{name}(*args, &block) - @sparse = true - super - end - END - end - - def []= idx, val - @sparse = true if val == AVAIL - super - end - - def free_block - if @sparse - i = index(AVAIL) and return i - end - @sparse = false - push AVAIL - length - 1 - end - - # must return first_block. modifies +blocks+ in place - def resize_chain blocks, size - new_num_blocks = (size / block_size.to_f).ceil - old_num_blocks = blocks.length - if new_num_blocks < old_num_blocks - # de-allocate some of our old blocks. TODO maybe zero them out in the file??? - (new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL } - self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0 - blocks.slice! new_num_blocks..-1 - elsif new_num_blocks > old_num_blocks - # need some more blocks. - last_block = blocks.last - (new_num_blocks - old_num_blocks).times do - block = free_block - # connect the chain. handle corner case of blocks being [] initially - self[last_block] = block if last_block - blocks << block - last_block = block - self[last_block] = EOC - end - end - # update ranges, and return that also now - blocks - end - - class Big < AllocationTable - def initialize(*args) - super - @block_size = 1 << @ole.header.b_shift - @io = @ole.io - end - - # Big blocks are kind of -1 based, in order to not clash with the header. - def blocks_to_ranges blocks, size - super blocks.map { |b| b + 1 }, size - end - end - - class Small < AllocationTable - def initialize(*args) - super - @block_size = 1 << @ole.header.s_shift - @io = @ole.sb_file - end - end - end - - # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an - # AllocationTable, and can be resized. used for read/write to 2 streams: - # 1. serialized dirent data - # 2. sbat table data - # 3. all dirents but through RangesIOMigrateable below - # - # Note that all internal access to first_block is through accessors, as it is sometimes - # useful to redirect it. - class RangesIOResizeable < RangesIO - attr_reader :bat - attr_accessor :first_block - def initialize bat, mode='r', params={} - mode, params = 'r', mode if Hash === mode - first_block, size = params.values_at :first_block, :size - raise ArgumentError, 'must specify first_block' unless first_block - @bat = bat - self.first_block = first_block - # we now cache the blocks chain, for faster resizing. - @blocks = @bat.chain first_block - super @bat.io, mode, :ranges => @bat.ranges(@blocks, size) - end - - def truncate size - # note that old_blocks is != @ranges.length necessarily. i'm planning to write a - # merge_ranges function that merges sequential ranges into one as an optimization. - @bat.resize_chain @blocks, size - @ranges = @bat.ranges @blocks, size - @pos = @size if @pos > size - self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first - - # don't know if this is required, but we explicitly request our @io to grow if necessary - # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions - # can be made. - # maybe its ok to just seek out there later?? - max = @ranges.map { |pos, len| pos + len }.max || 0 - @io.truncate max if max > @io.size - - @size = size - end - end - - # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration - # between bats based on size, and updating the dirent. - class RangesIOMigrateable < RangesIOResizeable - attr_reader :dirent - def initialize dirent, mode='r' - @dirent = dirent - super @dirent.ole.bat_for_size(@dirent.size), mode, - :first_block => @dirent.first_block, :size => @dirent.size - end - - def truncate size - bat = @dirent.ole.bat_for_size size - if bat.class != @bat.class - # bat migration needed! we need to backup some data. the amount of data - # should be <= @ole.header.threshold, so we can just hold it all in one buffer. - # backup this - pos = @pos - @pos = 0 - keep = read [@size, size].min - # this does a normal truncate to 0, removing our presence from the old bat, and - # rewrite the dirent's first_block - super 0 - @bat = bat - # just change the underlying io from right under everyone :) - @io = bat.io - # important to do this now, before the write. as the below write will always - # migrate us back to sbat! this will now allocate us +size+ in the new bat. - super - @pos = 0 - write keep - @pos = pos - else - super - end - # now just update the file - @dirent.size = size - end - - # forward this to the dirent - def first_block - @dirent.first_block - end - - def first_block= val - @dirent.first_block = val - end - end - - # - # A class which wraps an ole directory entry. Can be either a directory - # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>) - # - # Most interaction with <tt>Ole::Storage</tt> is through this class. - # The 2 most important functions are <tt>Dirent#children</tt>, and - # <tt>Dirent#data</tt>. - # - # was considering separate classes for dirs and files. some methods/attrs only - # applicable to one or the other. - # - # As with the other classes, #to_s performs the serialization. - # - class Dirent < Struct.new( - :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child, - :clsid, :flags, # dirs only - :create_time_str, :modify_time_str, # files only - :first_block, :size, :reserved - ) - include RecursivelyEnumerable - - PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4' - SIZE = 128 - TYPE_MAP = { - # this is temporary - 0 => :empty, - 1 => :dir, - 2 => :file, - 5 => :root - } - # something to do with the fact that the tree is supposed to be red-black - COLOUR_MAP = { - 0 => :red, - 1 => :black - } - # used in the next / prev / child stuff to show that the tree ends here. - # also used for first_block for directory. - EOT = 0xffffffff - DEFAULT = [ - 0.chr * 2, 2, 0, # will get overwritten - 1, EOT, EOT, EOT, - 0.chr * 16, 0, nil, nil, - AllocationTable::EOC, 0, 0.chr * 4 - ] - - # i think its just used by the tree building - attr_accessor :idx - # This returns all the children of this +Dirent+. It is filled in - # when the tree structure is recreated. - attr_accessor :children - attr_accessor :name - attr_reader :ole, :type, :create_time, :modify_time - def initialize ole, values=DEFAULT, params={} - @ole = ole - values, params = DEFAULT, values if Hash === values - values = values.unpack(PACK) if String === values - super(*values) - - # extra parsing from the actual struct values - @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len]) - @type = if params[:type] - unless TYPE_MAP.values.include?(params[:type]) - raise ArgumentError, "unknown type #{params[:type].inspect}" - end - params[:type] - else - TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}" - end - - # further extra type specific stuff - if file? - default_time = @ole.params[:update_timestamps] ? Time.now : nil - @create_time ||= default_time - @modify_time ||= default_time - @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str - @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str - @children = nil - else - @create_time = nil - @modify_time = nil - self.size = 0 unless @type == :root - @children = [] - end - - # to silence warnings. used for tree building at load time - # only. - @idx = nil - end - - def open mode='r' - raise Errno::EISDIR unless file? - io = RangesIOMigrateable.new self, mode - # TODO work on the mode string stuff a bit more. - # maybe let the io object know about the mode, so it can refuse - # to work for read/write appropriately. maybe redefine all unusable - # methods using singleton class to throw errors. - # for now, i just want to implement truncation on use of 'w'. later, - # i need to do 'a' etc. - case mode - when 'r', 'r+' - # as i don't enforce reading/writing, nothing changes here. kind of - # need to enforce tt if i want modify times to work better. - @modify_time = Time.now if mode == 'r+' - when 'w' - @modify_time = Time.now - # io.truncate 0 - #else - # raise NotImplementedError, "unsupported mode - #{mode.inspect}" - end - if block_given? - begin yield io - ensure; io.close - end - else io - end - end - - def read limit=nil - open { |io| io.read limit } - end - - def file? - type == :file - end - - def dir? - # to count root as a dir. - !file? - end - - # maybe need some options regarding case sensitivity. - def / name - children.find { |child| name === child.name } - end - - def [] idx - if String === idx - #warn 'String form of Dirent#[] is deprecated' - self / idx - else - super - end - end - - # move to ruby-msg. and remove from here - def time - #warn 'Dirent#time is deprecated' - create_time || modify_time - end - - def each_child(&block) - @children.each(&block) - end - - # flattens the tree starting from here into +dirents+. note it modifies its argument. - def flatten dirents=[] - @idx = dirents.length - dirents << self - if file? - self.prev = self.next = self.child = EOT - else - children.each { |child| child.flatten dirents } - self.child = Dirent.flatten_helper children - end - dirents - end - - # i think making the tree structure optimized is actually more complex than this, and - # requires some intelligent ordering of the children based on names, but as long as - # it is valid its ok. - # actually, i think its ok. gsf for example only outputs a singly-linked-list, where - # prev is always EOT. - def self.flatten_helper children - return EOT if children.empty? - i = children.length / 2 - this = children[i] - this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] } - this.idx - end - - def to_s - tmp = Types::Variant.dump(Types::VT_LPWSTR, name) - tmp = tmp[0, 62] if tmp.length > 62 - tmp += 0.chr * 2 - self.name_len = tmp.length - self.name_utf16 = tmp + 0.chr * (64 - tmp.length) - # type_id can perhaps be set in the initializer, as its read only now. - self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first - # for the case of files, it is assumed that that was handled already - # note not dir?, so as not to override root's first_block - self.first_block = Dirent::EOT if type == :dir - if file? - # this is messed up. it changes the time stamps regardless of whether the file - # was actually touched. instead, any open call with a writeable mode, should update - # the modify time. create time would be set in new. - if @ole.params[:update_timestamps] - self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time - self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time - end - else - self.create_time_str = 0.chr * 8 - self.modify_time_str = 0.chr * 8 - end - to_a.pack PACK - end - - def inspect - str = "#<Dirent:#{name.inspect}" - # perhaps i should remove the data snippet. its not that useful anymore. - # there is also some dir specific stuff. like clsid, flags, that i should - # probably include - if file? - tmp = read 9 - data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp - str << " size=#{size}" + - "#{modify_time ? ' modify_time=' + modify_time.to_s.inspect : nil}" + - " data=#{data.inspect}" - end - str + '>' - end - - def delete child - # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone - raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child - # free our blocks - child.open { |io| io.truncate 0 } - end - - def self.copy src, dst - # copies the contents of src to dst. must be the same type. this will throw an - # error on copying to root. maybe this will recurse too much for big documents?? - raise ArgumentError, 'differing types' if src.file? and !dst.file? - dst.name = src.name - if src.dir? - src.children.each do |src_child| - dst_child = Dirent.new dst.ole, :type => src_child.type - dst.children << dst_child - Dirent.copy src_child, dst_child - end - else - src.open do |src_io| - dst.open { |dst_io| IO.copy src_io, dst_io } - end - end - end - end - end -end - diff --git a/vendor/ruby-ole/lib/ole/storage/file_system.rb b/vendor/ruby-ole/lib/ole/storage/file_system.rb deleted file mode 100644 index 531f1ba11..000000000 --- a/vendor/ruby-ole/lib/ole/storage/file_system.rb +++ /dev/null @@ -1,423 +0,0 @@ -# -# = Introduction -# -# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>. -# -# = TODO -# -# - need to implement some more IO functions on RangesIO, like #puts, #print -# etc, like AbstractOutputStream from zipfile. -# -# - check Dir.mkdir, and File.open, and File.rename, to add in filename -# length checks (max 32 / 31 or something). -# do the automatic truncation, and add in any necessary warnings. -# -# - File.split('a/') == File.split('a') == ['.', 'a'] -# the implication of this, is that things that try to force directory -# don't work. like, File.rename('a', 'b'), should work if a is a file -# or directory, but File.rename('a/', 'b') should only work if a is -# a directory. tricky, need to clean things up a bit more. -# i think a general path name => dirent method would work, with flags -# about what should raise an error. -# -# - Need to look at streamlining things after getting all the tests passing, -# as this file's getting pretty long - almost half the real implementation. -# and is probably more inefficient than necessary. -# too many exceptions in the expected path of certain functions. -# -# - should look at profiles before and after switching ruby-msg to use -# the filesystem api. -# - -require 'ole/storage' - -module Ole # :nodoc: - class Storage - def file - @file ||= FileClass.new self - end - - def dir - @dir ||= DirClass.new self - end - - # tries to get a dirent for path. return nil if it doesn't exist - # (change it) - def dirent_from_path path - dirent = @root - path = file.expand_path path - path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/) - until path.empty? - return nil if dirent.file? - return nil unless dirent = dirent/path.shift - end - dirent - end - - class FileClass - class Stat - attr_reader :ftype, :size, :blocks, :blksize - attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino - def initialize dirent - @dirent = dirent - @size = dirent.size - if file? - @ftype = 'file' - bat = dirent.ole.bat_for_size(dirent.size) - @blocks = bat.chain(dirent.first_block).length - @blksize = bat.block_size - else - @ftype = 'directory' - @blocks = 0 - @blksize = 0 - end - # a lot of these are bogus. ole file format has no analogs - @nlink = 1 - @uid, @gid = 0, 0 - @dev, @rdev = 0, 0 - @ino = 0 - # need to add times - atime, mtime, ctime. - end - - alias rdev_major :rdev - alias rdev_minor :rdev - - def file? - @dirent.file? - end - - def directory? - @dirent.dir? - end - - def size? - size if file? - end - - def inspect - pairs = (instance_variables - ['@dirent']).map do |n| - "#{n[1..-1]}=#{instance_variable_get n}" - end - "#<#{self.class} #{pairs * ', '}>" - end - end - - def initialize ole - @ole = ole - end - - def expand_path path - # get the raw stored pwd value (its blank for root) - pwd = @ole.dir.instance_variable_get :@pwd - # its only absolute if it starts with a '/' - path = "#{pwd}/#{path}" unless path =~ /^\// - # at this point its already absolute. we use File.expand_path - # just for the .. and . handling - # No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way? - File.expand_path(path)[File::ALT_SEPARATOR == "\\" ? (2..-1) : (0..-1)] - end - - # +orig_path+ is just so that we can use the requested path - # in the error messages even if it has been already modified - def dirent_from_path path, orig_path=nil - orig_path ||= path - dirent = @ole.dirent_from_path path - raise Errno::ENOENT, orig_path unless dirent - raise Errno::EISDIR, orig_path if dirent.dir? - dirent - end - private :dirent_from_path - - def exists? path - !!@ole.dirent_from_path(path) - end - alias exist? :exists? - - def file? path - dirent = @ole.dirent_from_path path - dirent and dirent.file? - end - - def directory? path - dirent = @ole.dirent_from_path path - dirent and dirent.dir? - end - - def open path, mode='r', &block - if IO::Mode.new(mode).create? - begin - dirent = dirent_from_path path - rescue Errno::ENOENT - # maybe instead of repeating this everywhere, i should have - # a get_parent_dirent function. - parent_path, basename = File.split expand_path(path) - parent = @ole.dir.send :dirent_from_path, parent_path, path - parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename) - end - else - dirent = dirent_from_path path - end - dirent.open mode, &block - end - - # explicit wrapper instead of alias to inhibit block - def new path, mode='r' - open path, mode - end - - def size path - dirent_from_path(path).size - rescue Errno::EISDIR - # kind of arbitrary. I'm getting 4096 from ::File, but - # the zip tests want 0. - 0 - end - - def size? path - dirent_from_path(path).size - # any other exceptions i need to rescue? - rescue Errno::ENOENT, Errno::EISDIR - nil - end - - def stat path - # we do this to allow dirs. - dirent = @ole.dirent_from_path path - raise Errno::ENOENT, path unless dirent - Stat.new dirent - end - - def read path - open path, &:read - end - - # most of the work this function does is moving the dirent between - # 2 parents. the actual name changing is quite simple. - # File.rename can move a file into another folder, which is why i've - # done it too, though i think its not always possible... - # - # FIXME File.rename can be used for directories too.... - def rename from_path, to_path - # check what we want to rename from exists. do it this - # way to allow directories. - dirent = @ole.dirent_from_path from_path - raise Errno::ENOENT, from_path unless dirent - # delete what we want to rename to if necessary - begin - unlink to_path - rescue Errno::ENOENT - # we actually get here, but rcov doesn't think so. add 1 + 1 to - # keep rcov happy for now... :) - 1 + 1 - end - # reparent the dirent - from_parent_path, from_basename = File.split expand_path(from_path) - to_parent_path, to_basename = File.split expand_path(to_path) - from_parent = @ole.dir.send :dirent_from_path, from_parent_path, from_path - to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path - from_parent.children.delete dirent - # and also change its name - dirent.name = to_basename - to_parent.children << dirent - 0 - end - - # crappy copy from Dir. - def unlink(*paths) - paths.each do |path| - dirent = @ole.dirent_from_path path - # i think we should free all of our blocks from the - # allocation table. - # i think if you run repack, all free blocks should get zeroed, - # but currently the original data is there unmodified. - open(path) { |f| f.truncate 0 } - # remove ourself from our parent, so we won't be part of the dir - # tree at save time. - parent_path, basename = File.split expand_path(path) - parent = @ole.dir.send :dirent_from_path, parent_path, path - parent.children.delete dirent - end - paths.length # hmmm. as per ::File ? - end - alias delete :unlink - end - - # - # an *instance* of this class is supposed to provide similar methods - # to the class methods of Dir itself. - # - # pretty complete. like zip/zipfilesystem's implementation, i provide - # everything except chroot and glob. glob could be done with a glob - # to regex regex, and then simply match in the entries array... although - # recursive glob complicates that somewhat. - # - # Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list. - class DirClass - def initialize ole - @ole = ole - @pwd = '' - end - - # +orig_path+ is just so that we can use the requested path - # in the error messages even if it has been already modified - def dirent_from_path path, orig_path=nil - orig_path ||= path - dirent = @ole.dirent_from_path path - raise Errno::ENOENT, orig_path unless dirent - raise Errno::ENOTDIR, orig_path unless dirent.dir? - dirent - end - private :dirent_from_path - - def open path - dir = Dir.new path, entries(path) - if block_given? - yield dir - else - dir - end - end - - # as for file, explicit alias to inhibit block - def new path - open path - end - - # pwd is always stored without the trailing slash. we handle - # the root case here - def pwd - if @pwd.empty? - '/' - else - @pwd - end - end - alias getwd :pwd - - def chdir orig_path - # make path absolute, squeeze slashes, and remove trailing slash - path = @ole.file.expand_path(orig_path).gsub(/\/+/, '/').sub(/\/$/, '') - # this is just for the side effects of the exceptions if invalid - dirent_from_path path, orig_path - if block_given? - old_pwd = @pwd - begin - @pwd = path - yield - ensure - @pwd = old_pwd - end - else - @pwd = path - 0 - end - end - - def entries path - dirent = dirent_from_path path - # Not sure about adding on the dots... - entries = %w[. ..] + dirent.children.map(&:name) - # do some checks about un-reachable files - seen = {} - entries.each do |n| - Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/'] - Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n] - seen[n] = true - end - entries - end - - def foreach path, &block - entries(path).each(&block) - end - - # there are some other important ones, like: - # chroot (!), glob etc etc. for now, i think - def mkdir path - # as for rmdir below: - parent_path, basename = File.split @ole.file.expand_path(path) - # note that we will complain about the full path despite accessing - # the parent path. this is consistent with ::Dir - parent = dirent_from_path parent_path, path - # now, we first should ensure that it doesn't already exist - # either as a file or a directory. - raise Errno::EEXIST, path if parent/basename - parent.children << Dirent.new(@ole, :type => :dir, :name => basename) - 0 - end - - def rmdir path - dirent = dirent_from_path path - raise Errno::ENOTEMPTY, path unless dirent.children.empty? - - # now delete it, how to do that? the canonical representation that is - # maintained is the root tree, and the children array. we must remove it - # from the children array. - # we need the parent then. this sucks but anyway: - # we need to split the path. but before we can do that, we need - # to expand it first. eg. say we need the parent to unlink - # a/b/../c. the parent should be a, not a/b/.., or a/b. - parent_path, basename = File.split @ole.file.expand_path(path) - # this shouldn't be able to fail if the above didn't - parent = dirent_from_path parent_path - # note that the way this currently works, on save and repack time this will get - # reflected. to work properly, ie to make a difference now it would have to re-write - # the dirent. i think that Ole::Storage#close will handle that. and maybe include a - # #repack. - parent.children.delete dirent - 0 # hmmm. as per ::Dir ? - end - alias delete :rmdir - alias unlink :rmdir - - # note that there is nothing remotely ole specific about - # this class. it simply provides the dir like sequential access - # methods on top of an array. - # hmm, doesn't throw the IOError's on use of a closed directory... - class Dir - include Enumerable - - attr_reader :path - def initialize path, entries - @path, @entries, @pos = path, entries, 0 - @closed = false - end - - def pos - raise IOError if @closed - @pos - end - - def each(&block) - raise IOError if @closed - @entries.each(&block) - end - - def close - @closed = true - end - - def read - raise IOError if @closed - @entries[pos] - ensure - @pos += 1 if pos < @entries.length - end - - def pos= pos - raise IOError if @closed - @pos = [[0, pos].max, @entries.length].min - end - - def rewind - raise IOError if @closed - @pos = 0 - end - - alias tell :pos - alias seek :pos= - end - end - end -end - diff --git a/vendor/ruby-ole/lib/ole/storage/meta_data.rb b/vendor/ruby-ole/lib/ole/storage/meta_data.rb deleted file mode 100644 index be84037df..000000000 --- a/vendor/ruby-ole/lib/ole/storage/meta_data.rb +++ /dev/null @@ -1,148 +0,0 @@ -require 'ole/types/property_set' - -module Ole - class Storage - # - # The MetaData class is designed to be high level interface to all the - # underlying meta data stored within different sections, themselves within - # different property set streams. - # - # With this class, you can simply get properties using their names, without - # needing to know about the underlying guids, property ids etc. - # - # Example: - # - # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author } - # - # TODO: - # - # * add write support - # * fix some of the missing type coercion (eg FileTime) - # * maybe add back the ability to access individual property sets as a unit - # directly. ie <tt>ole.summary_information</tt>. Is this useful? - # * full key support, for unknown keys, like - # <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined - # properties too. - # - class MetaData - include Enumerable - - FILE_MAP = { - Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", - Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation" - } - - FORMAT_MAP = { - 'MSWordDoc' => :doc - } - - CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}" - CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}" - CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}" - CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}" - - CLSID_MAP = { - CLSID_EXCEL97 => :xls, - CLSID_EXCEL95 => :xls, - CLSID_WORD97 => :doc, - CLSID_WORD95 => :doc - } - - MIME_TYPES = { - :xls => 'application/vnd.ms-excel', - :doc => 'application/msword', - :ppt => 'application/vnd.ms-powerpoint', - # not registered at IANA, but seems most common usage - :msg => 'application/vnd.ms-outlook', - # this is my default fallback option. also not registered at IANA. - # file(1)'s default is application/msword, which is useless... - nil => 'application/x-ole-storage' - } - - def initialize ole - @ole = ole - end - - # i'm thinking of making file_format and mime_type available through - # #[], #each, and #to_h also, as calculated meta data (not assignable) - - def comp_obj - return {} unless dirent = @ole.root["\001CompObj"] - data = dirent.read - # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html - # compobj_version: 0x0001 - # byte_order: 0xffe - # windows_version: 0x00000a03 (win31 apparently) - # marker: 0xffffffff - compobj_version, byte_order, windows_version, marker, clsid = - data.unpack("vvVVa#{Types::Clsid::SIZE}") - strings = [] - i = 28 - while i < data.length - len = data[i, 4].unpack('V').first - i += 4 - strings << data[i, len - 1] - i += len - end - # in the unknown chunk, you usually see something like 'Word.Document.6' - {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]} - end - private :comp_obj - - def file_format - comp_obj[:file_format] - end - - def mime_type - # based on the CompObj stream contents - type = FORMAT_MAP[file_format] - return MIME_TYPES[type] if type - - # based on the root clsid - type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)] - return MIME_TYPES[type] if type - - # fallback to heuristics - has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten] - return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0'] - return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document'] - return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book'] - - MIME_TYPES[nil] - end - - def [] key - pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil - file = FILE_MAP[pair.first] or return nil - dirent = @ole.root[file] or return nil - dirent.open { |io| return Types::PropertySet.new(io)[key] } - end - - def []= key, value - raise NotImplementedError, 'meta data writes not implemented' - end - - def each(&block) - FILE_MAP.values.each do |file| - dirent = @ole.root[file] or next - dirent.open { |io| Types::PropertySet.new(io).each(&block) } - end - end - - def to_h - inject({}) { |hash, (name, value)| hash.update name.to_sym => value } - end - - def method_missing name, *args, &block - return super unless args.empty? - pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super - self[name] - end - end - - def meta_data - @meta_data ||= MetaData.new(self) - end - end -end - diff --git a/vendor/ruby-ole/lib/ole/support.rb b/vendor/ruby-ole/lib/ole/support.rb deleted file mode 100644 index bbb0bbe68..000000000 --- a/vendor/ruby-ole/lib/ole/support.rb +++ /dev/null @@ -1,256 +0,0 @@ -# -# A file with general support functions used by most files in the project. -# -# These are the only methods added to other classes. -# - -require 'logger' -require 'stringio' -require 'enumerator' - -class String # :nodoc: - # plural of String#index. returns all offsets of +string+. rename to indices? - # - # note that it doesn't check for overlapping values. - def indexes string - # in some ways i'm surprised that $~ works properly in this case... - to_enum(:scan, /#{Regexp.quote string}/m).map { $~.begin 0 } - end - - def each_chunk size - (length / size.to_f).ceil.times { |i| yield self[i * size, size] } - end -end - -class File # :nodoc: - # for interface consistency with StringIO etc (rather than adding #stat - # to them). used by RangesIO. - def size - stat.size - end -end - -class Symbol # :nodoc: - unless :x.respond_to? :to_proc - def to_proc - proc { |a| a.send self } - end - end -end - -module Enumerable # :nodoc: - unless [].respond_to? :group_by - # 1.9 backport - def group_by - hash = Hash.new { |h, key| h[key] = [] } - each { |item| hash[yield(item)] << item } - hash - end - end - - unless [].respond_to? :sum - def sum initial=0 - inject(initial) { |a, b| a + b } - end - end -end - -# move to support? -class IO # :nodoc: - # Copy data from IO-like object +src+, to +dst+ - def self.copy src, dst - until src.eof? - buf = src.read(4096) - dst.write buf - end - end -end - -class Logger # :nodoc: - # A helper method for creating a +Logger+ which produce call stack - # in their output - def self.new_with_callstack logdev=STDERR - log = Logger.new logdev - log.level = WARN - log.formatter = proc do |severity, time, progname, msg| - # find where we were called from, in our code - callstack = caller.dup - callstack.shift while callstack.first =~ /\/logger\.rb:\d+:in/ - from = callstack.first.sub(/:in `(.*?)'/, ":\\1") - "[%s %s]\n%-7s%s\n" % [time.strftime('%H:%M:%S'), from, severity, msg.to_s] - end - log - end -end - -# Include this module into a class that defines #each_child. It should -# maybe use #each instead, but its easier to be more specific, and use -# an alias. -# -# I don't want to force the class to cache children (eg where children -# are loaded on request in pst), because that forces the whole tree to -# be loaded. So, the methods should only call #each_child once, and -# breadth first iteration holds its own copy of the children around. -# -# Main methods are #recursive, and #to_tree -module RecursivelyEnumerable # :nodoc: - def each_recursive_depth_first(&block) - each_child do |child| - yield child - if child.respond_to? :each_recursive_depth_first - child.each_recursive_depth_first(&block) - end - end - end - - # don't think this is actually a proper breadth first recursion. only first - # level is breadth first. - def each_recursive_breadth_first(&block) - children = [] - each_child do |child| - children << child if child.respond_to? :each_recursive_breadth_first - yield child - end - children.each { |child| child.each_recursive_breadth_first(&block) } - end - - def each_recursive mode=:depth_first, &block - # we always actually yield ourself (the tree root) before recursing - yield self - send "each_recursive_#{mode}", &block - end - - # the idea of this function, is to allow use of regular Enumerable methods - # in a recursive fashion. eg: - # - # # just looks at top level children - # root.find { |child| child.some_condition? } - # # recurse into all children getting non-folders, breadth first - # root.recursive(:breadth_first).select { |child| !child.folder? } - # # just get everything - # items = root.recursive.to_a - # - def recursive mode=:depth_first - to_enum(:each_recursive, mode) - end - - # streams a "tree" form of the recursively enumerable structure to +io+, or - # return a string form instead if +io+ is not specified. - # - # mostly a debugging aid. can specify a different block which will be called - # to provide the string form for each node. - def to_tree io='', &inspect - inspect ||= :inspect.to_proc - io << "- #{inspect[self]}\n" - recurse = proc do |node, prefix| - child = nil - node.each_child do |next_child| - if child - io << "#{prefix}|- #{inspect[child]}\n" - recurse.call child, prefix + '| ' - end - child = next_child - end if node.respond_to?(:each_child) - if child - io << "#{prefix}\\- #{inspect[child]}\n" - recurse.call child, prefix + ' ' - end - end - recurse.call self, ' ' - io - end -end - -# can include File::Constants -class IO - # this is for jruby - include File::Constants unless defined?(RDONLY) - - # nabbed from rubinius, and modified - def self.parse_mode mode - ret = 0 - - case mode[0, 1] - when 'r'; ret |= RDONLY - when 'w'; ret |= WRONLY | CREAT | TRUNC - when 'a'; ret |= WRONLY | CREAT | APPEND - else raise ArgumentError, "illegal access mode #{mode}" - end - - (1...mode.length).each do |i| - case mode[i, 1] - when '+'; ret = (ret & ~(RDONLY | WRONLY)) | RDWR - when 'b'; ret |= Mode::BINARY - else raise ArgumentError, "illegal access mode #{mode}" - end - end - - ret - end - - class Mode - # ruby 1.9 defines binary as 0, which isn't very helpful. - # its 4 in rubinius. no longer using - # - # BINARY = 0x4 unless defined?(BINARY) - # - # for that reason, have my own constants module here - module Constants - include File::Constants - BINARY = 0x4 - end - - include Constants - NAMES = %w[rdonly wronly rdwr creat trunc append binary] - - attr_reader :flags - def initialize flags - flags = IO.parse_mode flags.to_str if flags.respond_to? :to_str - raise ArgumentError, "invalid flags - #{flags.inspect}" unless Fixnum === flags - @flags = flags - end - - def writeable? - #(@flags & RDONLY) == 0 - (@flags & 0x3) != RDONLY - end - - def readable? - (@flags & WRONLY) == 0 - end - - def truncate? - (@flags & TRUNC) != 0 - end - - def append? - (@flags & APPEND) != 0 - end - - def create? - (@flags & CREAT) != 0 - end - - def binary? - (@flags & BINARY) != 0 - end - -=begin - # revisit this - def apply io - if truncate? - io.truncate 0 - elsif append? - io.seek IO::SEEK_END, 0 - end - end -=end - - def inspect - names = NAMES.map { |name| name if (flags & Mode.const_get(name.upcase)) != 0 } - names.unshift 'rdonly' if (flags & 0x3) == 0 - "#<#{self.class} #{names.compact * '|'}>" - end - end -end - diff --git a/vendor/ruby-ole/lib/ole/types.rb b/vendor/ruby-ole/lib/ole/types.rb deleted file mode 100644 index 95616927a..000000000 --- a/vendor/ruby-ole/lib/ole/types.rb +++ /dev/null @@ -1,2 +0,0 @@ -require 'ole/types/base' -require 'ole/types/property_set' diff --git a/vendor/ruby-ole/lib/ole/types/base.rb b/vendor/ruby-ole/lib/ole/types/base.rb deleted file mode 100644 index 31e7b24e9..000000000 --- a/vendor/ruby-ole/lib/ole/types/base.rb +++ /dev/null @@ -1,251 +0,0 @@ -require 'iconv' -require 'date' - -require 'ole/base' - -module Ole # :nodoc: - # - # The Types module contains all the serialization and deserialization code for standard ole - # types. - # - # It also defines all the variant type constants, and symbolic names. - # - module Types - # for anything that we don't have serialization code for - class Data < String - def self.load str - new str - end - - def self.dump str - str.to_s - end - end - - class Lpstr < String - def self.load str - # not sure if its always there, but there is often a trailing - # null byte. - new str.chomp(0.chr) - end - - def self.dump str - # do i need to append the null byte? - str.to_s - end - end - - # for VT_LPWSTR - class Lpwstr < String - FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le' - TO_UTF16 = Iconv.new 'utf-16le', 'utf-8' - - def self.load str - new FROM_UTF16.iconv(str).chomp(0.chr) - end - - def self.dump str - # need to append nulls? - data = TO_UTF16.iconv str - # not sure if this is the recommended way to do it, but I want to treat - # the resulting utf16 data as regular bytes, not characters. - data.force_encoding Encoding::US_ASCII if data.respond_to? :encoding - data - end - end - - # for VT_FILETIME - class FileTime < DateTime - SIZE = 8 - EPOCH = new 1601, 1, 1 - - # Create a +DateTime+ object from a struct +FILETIME+ - # (http://msdn2.microsoft.com/en-us/library/ms724284.aspx). - # - # Converts +str+ to two 32 bit time values, comprising the high and low 32 bits of - # the 100's of nanoseconds since 1st january 1601 (Epoch). - def self.load str - low, high = str.to_s.unpack 'V2' - # we ignore these, without even warning about it - return nil if low == 0 and high == 0 - # switched to rational, and fixed the off by 1 second error i sometimes got. - # time = EPOCH + (high * (1 << 32) + low) / 1e7 / 86400 rescue return - # use const_get to ensure we can return anything which subclasses this (VT_DATE?) - const_get('EPOCH') + Rational(high * (1 << 32) + low, 1e7.to_i * 86400) rescue return - # extra sanity check... - #unless (1800...2100) === time.year - # Log.warn "ignoring unlikely time value #{time.to_s}" - # return nil - #end - #time - end - - # +time+ should be able to be either a Time, Date, or DateTime. - def self.dump time - # i think i'll convert whatever i get to be a datetime, because of - # the covered range. - return 0.chr * SIZE unless time - time = time.send(:to_datetime) if Time === time - # don't bother to use const_get here - bignum = (time - EPOCH) * 86400 * 1e7.to_i - high, low = bignum.divmod 1 << 32 - [low, high].pack 'V2' - end - - def inspect - "#<#{self.class} #{to_s}>" - end - end - - # for VT_CLSID - # Unlike most of the other conversions, the Guid's are serialized/deserialized by actually - # doing nothing! (eg, _load & _dump are null ops) - # Rather, its just a string with a different inspect string, and it includes a - # helper method for creating a Guid from that readable form (#format). - class Clsid < String - SIZE = 16 - PACK = 'V v v CC C6' - - def self.load str - new str.to_s - end - - def self.dump guid - return 0.chr * SIZE unless guid - # allow use of plain strings in place of guids. - guid['-'] ? parse(guid) : guid - end - - def self.parse str - vals = str.scan(/[a-f\d]+/i).map(&:hex) - if vals.length == 5 - # this is pretty ugly - vals[3] = ('%04x' % vals[3]).scan(/../).map(&:hex) - vals[4] = ('%012x' % vals[4]).scan(/../).map(&:hex) - guid = new vals.flatten.pack(PACK) - return guid if guid.format.delete('{}') == str.downcase.delete('{}') - end - raise ArgumentError, 'invalid guid - %p' % str - end - - def format - "%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}" % unpack(PACK) - end - - def inspect - "#<#{self.class}:{#{format}}>" - end - end - - # - # The OLE variant types, extracted from - # http://www.marin.clara.net/COM/variant_type_definitions.htm. - # - # A subset is also in WIN32OLE::VARIANT, but its not cross platform (obviously). - # - # Use like: - # - # p Ole::Types::Variant::NAMES[0x001f] => 'VT_LPWSTR' - # p Ole::Types::VT_DATE # => 7 - # - # The serialization / deserialization functions should be fixed to make it easier - # to work with. like - # - # Ole::Types.from_str(VT_DATE, data) # and - # Ole::Types.to_str(VT_DATE, data) - # - # Or similar, rather than having to do VT_* <=> ad hoc class name etc as it is - # currently. - # - module Variant - NAMES = { - 0x0000 => 'VT_EMPTY', - 0x0001 => 'VT_NULL', - 0x0002 => 'VT_I2', - 0x0003 => 'VT_I4', - 0x0004 => 'VT_R4', - 0x0005 => 'VT_R8', - 0x0006 => 'VT_CY', - 0x0007 => 'VT_DATE', - 0x0008 => 'VT_BSTR', - 0x0009 => 'VT_DISPATCH', - 0x000a => 'VT_ERROR', - 0x000b => 'VT_BOOL', - 0x000c => 'VT_VARIANT', - 0x000d => 'VT_UNKNOWN', - 0x000e => 'VT_DECIMAL', - 0x0010 => 'VT_I1', - 0x0011 => 'VT_UI1', - 0x0012 => 'VT_UI2', - 0x0013 => 'VT_UI4', - 0x0014 => 'VT_I8', - 0x0015 => 'VT_UI8', - 0x0016 => 'VT_INT', - 0x0017 => 'VT_UINT', - 0x0018 => 'VT_VOID', - 0x0019 => 'VT_HRESULT', - 0x001a => 'VT_PTR', - 0x001b => 'VT_SAFEARRAY', - 0x001c => 'VT_CARRAY', - 0x001d => 'VT_USERDEFINED', - 0x001e => 'VT_LPSTR', - 0x001f => 'VT_LPWSTR', - 0x0040 => 'VT_FILETIME', - 0x0041 => 'VT_BLOB', - 0x0042 => 'VT_STREAM', - 0x0043 => 'VT_STORAGE', - 0x0044 => 'VT_STREAMED_OBJECT', - 0x0045 => 'VT_STORED_OBJECT', - 0x0046 => 'VT_BLOB_OBJECT', - 0x0047 => 'VT_CF', - 0x0048 => 'VT_CLSID', - 0x0fff => 'VT_ILLEGALMASKED', - 0x0fff => 'VT_TYPEMASK', - 0x1000 => 'VT_VECTOR', - 0x2000 => 'VT_ARRAY', - 0x4000 => 'VT_BYREF', - 0x8000 => 'VT_RESERVED', - 0xffff => 'VT_ILLEGAL' - } - - CLASS_MAP = { - # haven't seen one of these. wonder if its same as FILETIME? - #'VT_DATE' => ?, - 'VT_LPSTR' => Lpstr, - 'VT_LPWSTR' => Lpwstr, - 'VT_FILETIME' => FileTime, - 'VT_CLSID' => Clsid - } - - module Constants - NAMES.each { |num, name| const_set name, num } - end - - def self.load type, str - type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type - (CLASS_MAP[type] || Data).load str - end - - def self.dump type, variant - type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type - (CLASS_MAP[type] || Data).dump variant - end - end - - include Variant::Constants - - # deprecated aliases, kept mostly for the benefit of ruby-msg, until - # i release a new version. - def self.load_guid str - Variant.load VT_CLSID, str - end - - def self.load_time str - Variant.load VT_FILETIME, str - end - - FROM_UTF16 = Lpwstr::FROM_UTF16 - TO_UTF16 = Lpwstr::TO_UTF16 - end -end - diff --git a/vendor/ruby-ole/lib/ole/types/property_set.rb b/vendor/ruby-ole/lib/ole/types/property_set.rb deleted file mode 100644 index b8d85acba..000000000 --- a/vendor/ruby-ole/lib/ole/types/property_set.rb +++ /dev/null @@ -1,165 +0,0 @@ -require 'ole/types' -require 'yaml' - -module Ole - module Types - # - # The PropertySet class currently supports readonly access to the properties - # serialized in "property set" streams, such as the file "\005SummaryInformation", - # in OLE files. - # - # Think it has its roots in MFC property set serialization. - # - # See http://poi.apache.org/hpsf/internals.html for details - # - class PropertySet - HEADER_SIZE = 28 - HEADER_PACK = "vvVa#{Clsid::SIZE}V" - OS_MAP = { - 0 => :win16, - 1 => :mac, - 2 => :win32, - 0x20001 => :ooffice, # open office on linux... - } - - # define a smattering of the property set guids. - DATA = YAML.load_file(File.dirname(__FILE__) + '/../../../data/propids.yaml'). - inject({}) { |hash, (key, value)| hash.update Clsid.parse(key) => value } - - # create an inverted map of names to guid/key pairs - PROPERTY_MAP = DATA.inject({}) do |h1, (guid, data)| - data[1].inject(h1) { |h2, (id, name)| h2.update name => [guid, id] } - end - - module Constants - DATA.each { |guid, (name, map)| const_set name, guid } - end - - include Constants - include Enumerable - - class Section - include Variant::Constants - include Enumerable - - SIZE = Clsid::SIZE + 4 - PACK = "a#{Clsid::SIZE}v" - - attr_accessor :guid, :offset - attr_reader :length - - def initialize str, property_set - @property_set = property_set - @guid, @offset = str.unpack PACK - self.guid = Clsid.load guid - load_header - end - - def io - @property_set.io - end - - def load_header - io.seek offset - @byte_size, @length = io.read(8).unpack 'V2' - end - - def [] key - each_raw do |id, property_offset| - return read_property(property_offset).last if key == id - end - nil - end - - def []= key, value - raise NotImplementedError, 'section writes not yet implemented' - end - - def each - each_raw do |id, property_offset| - yield id, read_property(property_offset).last - end - end - - private - - def each_raw - io.seek offset + 8 - io.read(length * 8).each_chunk(8) { |str| yield(*str.unpack('V2')) } - end - - def read_property property_offset - io.seek offset + property_offset - type, value = io.read(8).unpack('V2') - # is the method of serialization here custom? - case type - when VT_LPSTR, VT_LPWSTR - value = Variant.load type, io.read(value) - # .... - end - [type, value] - end - end - - attr_reader :io, :signature, :unknown, :os, :guid, :sections - - def initialize io - @io = io - load_header io.read(HEADER_SIZE) - load_section_list io.read(@num_sections * Section::SIZE) - # expect no gap between last section and start of data. - #Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min - end - - def load_header str - @signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_PACK - # should i check that unknown == 0? it usually is. so is the guid actually - @guid = Clsid.load @guid - @os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}") - end - - def load_section_list str - @sections = str.to_enum(:each_chunk, Section::SIZE).map { |s| Section.new s, self } - end - - def [] key - pair = PROPERTY_MAP[key.to_s] or return nil - section = @sections.find { |s| s.guid == pair.first } or return nil - section[pair.last] - end - - def []= key, value - pair = PROPERTY_MAP[key.to_s] or return nil - section = @sections.find { |s| s.guid == pair.first } or return nil - section[pair.last] = value - end - - def method_missing name, *args, &block - if name.to_s =~ /(.*)=$/ - return super unless args.length == 1 - return super unless PROPERTY_MAP[$1] - self[$1] = args.first - else - return super unless args.length == 0 - return super unless PROPERTY_MAP[name.to_s] - self[name] - end - end - - def each - @sections.each do |section| - next unless pair = DATA[section.guid] - map = pair.last - section.each do |id, value| - name = map[id] or next - yield name, value - end - end - end - - def to_h - inject({}) { |hash, (name, value)| hash.update name.to_sym => value } - end - end - end -end |