#
# = Introduction
#
# This file is mostly an attempt to port libpst to ruby, and simplify it in the process. It
# will leverage much of the existing MAPI => MIME conversion developed for Msg files, and as
# such is purely concerned with the file structure details.
#
# = TODO
# 
# 1. solve recipient table problem (test4).
#    this is done. turns out it was due to id2 clashes. find better solution
# 2. check parse consistency. an initial conversion of a 30M file to pst, shows
#    a number of messages conveting badly. compare with libpst too.
# 3. xattribs
# 4. generalise the Mapi stuff better
# 5. refactor index load
# 6. msg serialization?
#

=begin

quick plan for cleanup.

have working tests for 97 and 03 file formats, so safe.

want to fix up:

64 bit unpacks scattered around. its ugly. not sure how best to handle it, but am slightly tempted
to override String#unpack to support a 64 bit little endian unpack (like L vs N/V, for Q). one way or
another need to fix it. Could really slow everything else down if its parsing the unpack strings twice,
once in ruby, for every single unpack i do :/

the index loading process, and the lack of shared code between normal vs 64 bit variants, and Index vs Desc.
should be able to reduce code by factor of 4. also think I should move load code into the class too. then
maybe have something like:

class Header
	def index_class
		version_2003 ? Index64 : Index
	end
end

def load_idx
	header.index_class.load_index
end

OR

def initialize
	@header = ...
	extend @header.index_class::Load
	load_idx
end

need to think about the role of the mapi code, and Pst::Item etc, but that layer can come later.

=end

require 'mapi'
require 'enumerator'
require 'ostruct'
require 'ole/ranges_io'

module Mapi
class Pst
	class FormatError < StandardError
	end

	# unfortunately there is no Q analogue which is little endian only.
	# this translates T as an unsigned quad word, little endian byte order, to
	# not pollute the rest of the code.
	#
	# didn't want to override String#unpack, cause its too hacky, and incomplete.
	def self.unpack str, unpack_spec
		return str.unpack(unpack_spec) unless unpack_spec['T']
		@unpack_cache ||= {}
		t_offsets, new_spec = @unpack_cache[unpack_spec]
		unless t_offsets
			t_offsets = []
			offset = 0
			new_spec = ''
			unpack_spec.scan(/([^\d])_?(\*|\d+)?/o) do
				num_elems = $1.downcase == 'a' ? 1 : ($2 || 1).to_i
				if $1 == 'T'
					num_elems.times { |i| t_offsets << offset + i }
					new_spec << "V#{num_elems * 2}"
				else
					new_spec << $~[0]
				end
				offset += num_elems
			end
			@unpack_cache[unpack_spec] = [t_offsets, new_spec]
		end
		a = str.unpack(new_spec)
		t_offsets.each do |offset|
			low, high = a[offset, 2]
			a[offset, 2] = low && high ? low + (high << 32) : nil
		end
		a
	end

	#
	# this is the header and encryption encapsulation code
	# ----------------------------------------------------------------------------
	#

	# class which encapsulates the pst header
	class Header
		SIZE = 512
		MAGIC = 0x2142444e

		# these are the constants defined in libpst.c, that
		# are referenced in pst_open()
		INDEX_TYPE_OFFSET = 0x0A
		FILE_SIZE_POINTER = 0xA8
		FILE_SIZE_POINTER_64 = 0xB8
		SECOND_POINTER = 0xBC
		INDEX_POINTER = 0xC4
		SECOND_POINTER_64 = 0xE0
		INDEX_POINTER_64 = 0xF0
		ENC_OFFSET = 0x1CD

		attr_reader :magic, :index_type, :encrypt_type, :size
		attr_reader :index1_count, :index1, :index2_count, :index2
		attr_reader :version
		def initialize data
			@magic = data.unpack('N')[0]
			@index_type = data[INDEX_TYPE_OFFSET]
			@version = {0x0e => 1997, 0x17 => 2003}[@index_type]

			if version_2003?
				# don't know?
				# >> data1.unpack('V*').zip(data2.unpack('V*')).enum_with_index.select { |(c, d), i| c != d and not [46, 56, 60].include?(i) }.select { |(a, b), i| b == 0 }.map { |(a, b), i| [a / 256, i] }
				#   [8, 76], [32768, 84], [128, 89]
				# >> data1.unpack('C*').zip(data2.unpack('C*')).enum_with_index.select { |(c, d), i| c != d and not [184..187, 224..227, 240..243].any? { |r| r === i } }.select { |(a, b), i| b == 0 and ((Math.log(a) / Math.log(2)) % 1) < 0.0001 }
				#   [[[2, 0], 61], [[2, 0], 76], [[2, 0], 195], [[2, 0], 257], [[8, 0], 305], [[128, 0], 338], [[128, 0], 357]]
				# i have only 2 psts to base this guess on, so i can't really come up with anything that looks reasonable yet. not sure what the offset is. unfortunately there is so much in the header
				# that isn't understood...
				@encrypt_type = 1

				@index2_count, @index2 = data[SECOND_POINTER_64 - 4, 8].unpack('V2')
				@index1_count, @index1 = data[INDEX_POINTER_64  - 4, 8].unpack('V2')

				@size = data[FILE_SIZE_POINTER_64, 4].unpack('V')[0]
			else
				@encrypt_type = data[ENC_OFFSET]

				@index2_count, @index2 = data[SECOND_POINTER - 4, 8].unpack('V2')
				@index1_count, @index1 = data[INDEX_POINTER  - 4, 8].unpack('V2')

				@size = data[FILE_SIZE_POINTER, 4].unpack('V')[0]
			end

			validate!
		end

		def version_2003?
			version == 2003
		end

		def encrypted?
			encrypt_type != 0
		end

		def validate!
			raise FormatError, "bad signature on pst file (#{'0x%x' % magic})" unless magic == MAGIC
			raise FormatError, "only index types 0x0e and 0x17 are handled (#{'0x%x' % index_type})" unless [0x0e, 0x17].include?(index_type)
			raise FormatError, "only encrytion types 0 and 1 are handled (#{encrypt_type.inspect})" unless [0, 1].include?(encrypt_type)
		end
	end

	# compressible encryption! :D
	#
	# simple substitution. see libpst.c
	# maybe test switch to using a String#tr!
	class CompressibleEncryption
		DECRYPT_TABLE = [
			0x47, 0xf1, 0xb4, 0xe6, 0x0b, 0x6a, 0x72, 0x48,
			0x85, 0x4e, 0x9e, 0xeb, 0xe2, 0xf8, 0x94, 0x53, # 0x0f
			0xe0, 0xbb, 0xa0, 0x02, 0xe8, 0x5a, 0x09, 0xab,
			0xdb, 0xe3, 0xba, 0xc6, 0x7c, 0xc3, 0x10, 0xdd, # 0x1f
			0x39, 0x05, 0x96, 0x30, 0xf5, 0x37, 0x60, 0x82,
			0x8c, 0xc9, 0x13, 0x4a, 0x6b, 0x1d, 0xf3, 0xfb, # 0x2f
			0x8f, 0x26, 0x97, 0xca, 0x91, 0x17, 0x01, 0xc4,
			0x32, 0x2d, 0x6e, 0x31, 0x95, 0xff, 0xd9, 0x23, # 0x3f
			0xd1, 0x00, 0x5e, 0x79, 0xdc, 0x44, 0x3b, 0x1a,
			0x28, 0xc5, 0x61, 0x57, 0x20, 0x90, 0x3d, 0x83, # 0x4f
			0xb9, 0x43, 0xbe, 0x67, 0xd2, 0x46, 0x42, 0x76,
			0xc0, 0x6d, 0x5b, 0x7e, 0xb2, 0x0f, 0x16, 0x29, # 0x5f
			0x3c, 0xa9, 0x03, 0x54, 0x0d, 0xda, 0x5d, 0xdf,
			0xf6, 0xb7, 0xc7, 0x62, 0xcd, 0x8d, 0x06, 0xd3, # 0x6f
			0x69, 0x5c, 0x86, 0xd6, 0x14, 0xf7, 0xa5, 0x66,
			0x75, 0xac, 0xb1, 0xe9, 0x45, 0x21, 0x70, 0x0c, # 0x7f
			0x87, 0x9f, 0x74, 0xa4, 0x22, 0x4c, 0x6f, 0xbf,
			0x1f, 0x56, 0xaa, 0x2e, 0xb3, 0x78, 0x33, 0x50, # 0x8f
			0xb0, 0xa3, 0x92, 0xbc, 0xcf, 0x19, 0x1c, 0xa7,
			0x63, 0xcb, 0x1e, 0x4d, 0x3e, 0x4b, 0x1b, 0x9b, # 0x9f
			0x4f, 0xe7, 0xf0, 0xee, 0xad, 0x3a, 0xb5, 0x59,
			0x04, 0xea, 0x40, 0x55, 0x25, 0x51, 0xe5, 0x7a, # 0xaf
			0x89, 0x38, 0x68, 0x52, 0x7b, 0xfc, 0x27, 0xae,
			0xd7, 0xbd, 0xfa, 0x07, 0xf4, 0xcc, 0x8e, 0x5f, # 0xbf
			0xef, 0x35, 0x9c, 0x84, 0x2b, 0x15, 0xd5, 0x77,
			0x34, 0x49, 0xb6, 0x12, 0x0a, 0x7f, 0x71, 0x88, # 0xcf
			0xfd, 0x9d, 0x18, 0x41, 0x7d, 0x93, 0xd8, 0x58,
			0x2c, 0xce, 0xfe, 0x24, 0xaf, 0xde, 0xb8, 0x36, # 0xdf
			0xc8, 0xa1, 0x80, 0xa6, 0x99, 0x98, 0xa8, 0x2f,
			0x0e, 0x81, 0x65, 0x73, 0xe4, 0xc2, 0xa2, 0x8a, # 0xef
			0xd4, 0xe1, 0x11, 0xd0, 0x08, 0x8b, 0x2a, 0xf2,
			0xed, 0x9a, 0x64, 0x3f, 0xc1, 0x6c, 0xf9, 0xec  # 0xff
		]

		ENCRYPT_TABLE = [nil] * 256
		DECRYPT_TABLE.each_with_index { |i, j| ENCRYPT_TABLE[i] = j }

		def self.decrypt_alt encrypted
			decrypted = ''
			encrypted.length.times { |i| decrypted << DECRYPT_TABLE[encrypted[i]] }
			decrypted
		end

		def self.encrypt_alt decrypted
			encrypted = ''
			decrypted.length.times { |i| encrypted << ENCRYPT_TABLE[decrypted[i]] }
			encrypted
		end

		# an alternate implementation that is possibly faster....
		# TODO - bench
		DECRYPT_STR, ENCRYPT_STR = [DECRYPT_TABLE, (0...256)].map do |values|
			values.map { |i| i.chr }.join.gsub(/([\^\-\\])/, "\\\\\\1")
		end

		def self.decrypt encrypted
			encrypted.tr ENCRYPT_STR, DECRYPT_STR
		end

		def self.encrypt decrypted
			decrypted.tr DECRYPT_STR, ENCRYPT_STR
		end
	end

	class RangesIOEncryptable < RangesIO
		def initialize io, mode='r', params={}
			mode, params = 'r', mode if Hash === mode
			@decrypt = !!params[:decrypt]
			super
		end

		def encrypted?
			@decrypt
		end

		def read limit=nil
			buf = super
			buf = CompressibleEncryption.decrypt(buf) if encrypted?
			buf
		end
	end

	attr_reader :io, :header, :idx, :desc, :special_folder_ids

	# corresponds to
	# * pst_open
	# * pst_load_index
	def initialize io
		@io = io
		io.pos = 0
		@header = Header.new io.read(Header::SIZE)

		# would prefer this to be in Header#validate, but it doesn't have the io size.
		# should perhaps downgrade this to just be a warning...
		raise FormatError, "header size field invalid (#{header.size} != #{io.size}}" unless header.size == io.size

		load_idx
		load_desc
		load_xattrib

		@special_folder_ids = {}
	end

	def encrypted?
		@header.encrypted?
	end

	# until i properly fix logging...
	def warn s
		Mapi::Log.warn s
	end

	#
	# this is the index and desc record loading code
	# ----------------------------------------------------------------------------
	#

	ToTree = Module.new

	module Index2
		BLOCK_SIZE = 512
		module RecursiveLoad
			def load_chain
				#...
			end
		end

		module Base
			def read
				#...
			end
		end

		class Version1997 < Struct.new(:a)#...)
			SIZE = 12

			include RecursiveLoad
			include Base
		end

		class Version2003 < Struct.new(:a)#...)
			SIZE = 24

			include RecursiveLoad
			include Base
		end
	end

	module Desc2
		module Base
			def desc
				#...
			end
		end

		class Version1997 < Struct.new(:a)#...)
			#include Index::RecursiveLoad
			include Base
		end

		class Version2003 < Struct.new(:a)#...)
			#include Index::RecursiveLoad
			include Base
		end
	end

	# more constants from libpst.c
	# these relate to the index block
	ITEM_COUNT_OFFSET = 0x1f0 # count byte
	LEVEL_INDICATOR_OFFSET = 0x1f3 # node or leaf
	BACKLINK_OFFSET = 0x1f8 # backlink u1 value

	# these 3 classes are used to hold various file records

	# pst_index
	class Index < Struct.new(:id, :offset, :size, :u1)
		UNPACK_STR = 'VVvv'
		SIZE = 12
		BLOCK_SIZE = 512 # index blocks was 516 but bogus
		COUNT_MAX = 41 # max active items (ITEM_COUNT_OFFSET / Index::SIZE = 41)

		attr_accessor :pst
		def initialize data
			data = Pst.unpack data, UNPACK_STR if String === data
			super(*data)
		end

		def type
			@type ||= begin
				if id & 0x2 == 0
					:data
				else
					first_byte, second_byte = read.unpack('CC')
					if first_byte == 1
						raise second_byte unless second_byte == 1
						:data_chain_header
					elsif first_byte == 2
						raise second_byte unless second_byte == 0
						:id2_assoc
					else
						raise FormatError, 'unknown first byte for block - %p' % first_byte
					end
				end
			end
		end

		def data?
			(id & 0x2) == 0
		end

		def read decrypt=true
			# only data blocks are every encrypted
			decrypt = false unless data?
			pst.pst_read_block_size offset, size, decrypt
		end

		# show all numbers in hex
		def inspect
			super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }.sub(/Index /, "Index type=#{type.inspect}, ")
		end
	end

	# mostly guesses.
	ITEM_COUNT_OFFSET_64 = 0x1e8
	LEVEL_INDICATOR_OFFSET_64 = 0x1eb # diff of 3 between these 2 as above...

	# will maybe inherit from Index64, in order to get the same #type function.
	class Index64 < Index
		UNPACK_STR = 'TTvvV'
		SIZE = 24
		BLOCK_SIZE = 512
		COUNT_MAX = 20 # bit of a guess really. 512 / 24 = 21, but doesn't leave enough header room

		# this is the extra item on the end of the UNPACK_STR above
		attr_accessor :u2

		def initialize data
			data = Pst.unpack data, UNPACK_STR if String === data
			@u2 = data.pop
			super data
		end

		def inspect
			super.sub(/>$/, ', u2=%p>' % u2)
		end

		def self.load_chain io, header
			load_idx_rec io, header.index1, 0, 0
		end

		# almost identical to load code for Index, just different offsets and unpack strings.
		# can probably merge them, or write a generic load_tree function or something.
		def self.load_idx_rec io, offset, linku1, start_val
			io.seek offset
			buf = io.read BLOCK_SIZE
			idxs = []

			item_count = buf[ITEM_COUNT_OFFSET_64]
			raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX

			#idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
			#raise 'blah 1' unless idx.id == linku1

			if buf[LEVEL_INDICATOR_OFFSET_64] == 0
				# leaf pointers
				# split the data into item_count index objects
				buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
					idx = new data
					# first entry
					raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
					#idx.pst = self
					break if idx.id == 0
					idxs << idx
				end
			else
				# node pointers
				# split the data into item_count table pointers
				buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
					start, u1, offset = Pst.unpack data, 'T3'
					# for the first value, we expect the start to be equal
					raise 'blah 3' if i == 0 and start_val != 0 and start != start_val
					break if start == 0
					idxs += load_idx_rec io, offset, u1, start
				end
			end

			idxs
		end
	end

	# pst_desc
	class Desc64 < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id, :u2)
		UNPACK_STR = 'T3VV'
		SIZE = 32
		BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
		COUNT_MAX = 15 # guess as per Index64

		include RecursivelyEnumerable

		attr_accessor :pst
		attr_reader :children
		def initialize data
			super(*Pst.unpack(data, UNPACK_STR))
			@children = []
		end

		def desc
			pst.idx_from_id idx_id
		end

		def list_index
			pst.idx_from_id idx2_id
		end

		def self.load_chain io, header
			load_desc_rec io, header.index2, 0, 0x21
		end

		def self.load_desc_rec io, offset, linku1, start_val
			io.seek offset
			buf = io.read BLOCK_SIZE
			descs = []
			item_count = buf[ITEM_COUNT_OFFSET_64]

			# not real desc
			#desc = Desc.new buf[BACKLINK_OFFSET, 4]
			#raise 'blah 1' unless desc.desc_id == linku1

			if buf[LEVEL_INDICATOR_OFFSET_64] == 0
				# leaf pointers
				raise "have too many active items in index (#{item_count})" if item_count > COUNT_MAX
				# split the data into item_count desc objects
				buf[0, SIZE * item_count].scan(/.{#{SIZE}}/mo).each_with_index do |data, i|
					desc = new data
					# first entry
					raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
					break if desc.desc_id == 0
					descs << desc
				end
			else
				# node pointers
				raise "have too many active items in index (#{item_count})" if item_count > Index64::COUNT_MAX
				# split the data into item_count table pointers
				buf[0, Index64::SIZE * item_count].scan(/.{#{Index64::SIZE}}/mo).each_with_index do |data, i|
					start, u1, offset = Pst.unpack data, 'T3'
					# for the first value, we expect the start to be equal note that ids -1, so even for the
					# first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
					# that the first desc record is always 33...
					# thats because 0x21 is the pst root itself...
					raise 'blah 3' if i == 0 and start_val != -1 and start != start_val
					# this shouldn't really happen i'd imagine
					break if start == 0
					descs += load_desc_rec io, offset, u1, start
				end
			end

			descs
		end

		def each_child(&block)
			@children.each(&block)
		end
	end

	# _pst_table_ptr_struct
	class TablePtr < Struct.new(:start, :u1, :offset)
		UNPACK_STR = 'V3'
		SIZE = 12

		def initialize data
			data = data.unpack(UNPACK_STR) if String === data
			super(*data)
		end
	end

	# pst_desc
	# idx_id is a pointer to an idx record which gets the primary data stream for the Desc record.
	# idx2_id gets you an idx record, that when read gives you an ID2 association list, which just maps
	# another set of ids to index values
	class Desc < Struct.new(:desc_id, :idx_id, :idx2_id, :parent_desc_id)
		UNPACK_STR = 'V4'
		SIZE = 16
		BLOCK_SIZE = 512 # descriptor blocks was 520 but bogus
		COUNT_MAX = 31 # max active desc records (ITEM_COUNT_OFFSET / Desc::SIZE = 31)

		include ToTree

		attr_accessor :pst
		attr_reader :children
		def initialize data
			super(*data.unpack(UNPACK_STR))
			@children = []
		end

		def desc
			pst.idx_from_id idx_id
		end

		def list_index
			pst.idx_from_id idx2_id
		end

		# show all numbers in hex
		def inspect
			super.gsub(/=(\d+)/) { '=0x%x' % $1.to_i }
		end
	end

	# corresponds to
	# * _pst_build_id_ptr
	def load_idx
		@idx = []
		@idx_offsets = []
		if header.version_2003?
			@idx = Index64.load_chain io, header
			@idx.each { |idx| idx.pst = self }
		else
			load_idx_rec header.index1, header.index1_count, 0
		end

		# we'll typically be accessing by id, so create a hash as a lookup cache
		@idx_from_id = {}
 		@idx.each do |idx|
			warn "there are duplicate idx records with id #{idx.id}" if @idx_from_id[idx.id]
			@idx_from_id[idx.id] = idx
		end
	end

	# load the flat idx table, which maps ids to file ranges. this is the recursive helper
	#
	# corresponds to
	# * _pst_build_id_ptr
	def load_idx_rec offset, linku1, start_val
		@idx_offsets << offset

		#_pst_read_block_size(pf, offset, BLOCK_SIZE, &buf, 0, 0) < BLOCK_SIZE)
		buf = pst_read_block_size offset, Index::BLOCK_SIZE, false

		item_count = buf[ITEM_COUNT_OFFSET]
		raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX

		idx = Index.new buf[BACKLINK_OFFSET, Index::SIZE]
		raise 'blah 1' unless idx.id == linku1

		if buf[LEVEL_INDICATOR_OFFSET] == 0
			# leaf pointers
			# split the data into item_count index objects
			buf[0, Index::SIZE * item_count].scan(/.{#{Index::SIZE}}/mo).each_with_index do |data, i|
				idx = Index.new data
				# first entry
				raise 'blah 3' if i == 0 and start_val != 0 and idx.id != start_val
				idx.pst = self
				# this shouldn't really happen i'd imagine
				break if idx.id == 0
				@idx << idx
			end
		else
			# node pointers
			# split the data into item_count table pointers
			buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
				table = TablePtr.new data
				# for the first value, we expect the start to be equal
				raise 'blah 3' if i == 0 and start_val != 0 and table.start != start_val
				# this shouldn't really happen i'd imagine
				break if table.start == 0
				load_idx_rec table.offset, table.u1, table.start
			end
		end
	end

	# most access to idx objects will use this function
	#
	# corresponds to
	# * _pst_getID
	def idx_from_id id
		@idx_from_id[id]
	end

	# corresponds to
	# * _pst_build_desc_ptr
	# * record_descriptor
	def load_desc
		@desc = []
		@desc_offsets = []
		if header.version_2003?
			@desc = Desc64.load_chain io, header
			@desc.each { |desc| desc.pst = self }
		else
			load_desc_rec header.index2, header.index2_count, 0x21
		end

		# first create a lookup cache
		@desc_from_id = {}
 		@desc.each do |desc|
			desc.pst = self
			warn "there are duplicate desc records with id #{desc.desc_id}" if @desc_from_id[desc.desc_id]
			@desc_from_id[desc.desc_id] = desc
		end

		# now turn the flat list of loaded desc records into a tree

		# well, they have no parent, so they're more like, the toplevel descs.
		@orphans = []
		# now assign each node to the parents child array, putting the orphans in the above
		@desc.each do |desc|
			parent = @desc_from_id[desc.parent_desc_id]
			# note, besides this, its possible to create other circular structures.
			if parent == desc
				# this actually happens usually, for the root_item it appears.
				#warn "desc record's parent is itself (#{desc.inspect})"
			# maybe add some more checks in here for circular structures
			elsif parent
				parent.children << desc
				next
			end
			@orphans << desc
		end

		# maybe change this to some sort of sane-ness check. orphans are expected
#		warn "have #{@orphans.length} orphan desc record(s)." unless @orphans.empty?
	end

	# load the flat list of desc records recursively
	#
	# corresponds to
	# * _pst_build_desc_ptr
	# * record_descriptor
	def load_desc_rec offset, linku1, start_val
		@desc_offsets << offset
		
		buf = pst_read_block_size offset, Desc::BLOCK_SIZE, false
		item_count = buf[ITEM_COUNT_OFFSET]

		# not real desc
		desc = Desc.new buf[BACKLINK_OFFSET, 4]
		raise 'blah 1' unless desc.desc_id == linku1

		if buf[LEVEL_INDICATOR_OFFSET] == 0
			# leaf pointers
			raise "have too many active items in index (#{item_count})" if item_count > Desc::COUNT_MAX
			# split the data into item_count desc objects
			buf[0, Desc::SIZE * item_count].scan(/.{#{Desc::SIZE}}/mo).each_with_index do |data, i|
				desc = Desc.new data
				# first entry
				raise 'blah 3' if i == 0 and start_val != 0 and desc.desc_id != start_val
				# this shouldn't really happen i'd imagine
				break if desc.desc_id == 0
				@desc << desc
			end
		else
			# node pointers
			raise "have too many active items in index (#{item_count})" if item_count > Index::COUNT_MAX
			# split the data into item_count table pointers
			buf[0, TablePtr::SIZE * item_count].scan(/.{#{TablePtr::SIZE}}/mo).each_with_index do |data, i|
				table = TablePtr.new data
				# for the first value, we expect the start to be equal note that ids -1, so even for the
				# first we expect it to be equal. thats the 0x21 (dec 33) desc record. this means we assert
				# that the first desc record is always 33...
				raise 'blah 3' if i == 0 and start_val != -1 and table.start != start_val
				# this shouldn't really happen i'd imagine
				break if table.start == 0
				load_desc_rec table.offset, table.u1, table.start
			end
		end
	end

	# as for idx
	# 
	# corresponds to:
	# * _pst_getDptr
	def desc_from_id id
		@desc_from_id[id]
	end

	# corresponds to
	# * pst_load_extended_attributes
	def load_xattrib
		unless desc = desc_from_id(0x61)
			warn "no extended attributes desc record found"
			return
		end
		unless desc.desc
			warn "no desc idx for extended attributes"
			return
		end
		if desc.list_index
		end
		#warn "skipping loading xattribs"
		# FIXME implement loading xattribs
	end

	# corresponds to:
	# * _pst_read_block_size
	# * _pst_read_block ??
	# * _pst_ff_getIDblock_dec ??
	# * _pst_ff_getIDblock ??
	def pst_read_block_size offset, size, decrypt=true
		io.seek offset
		buf = io.read size
		warn "tried to read #{size} bytes but only got #{buf.length}" if buf.length != size
		encrypted? && decrypt ? CompressibleEncryption.decrypt(buf) : buf
	end

	#
	# id2 
	# ----------------------------------------------------------------------------
	#

	class ID2Assoc < Struct.new(:id2, :id, :table2)
		UNPACK_STR = 'V3'
		SIZE = 12

		def initialize data
			data = data.unpack(UNPACK_STR) if String === data
			super(*data)
		end
	end

	class ID2Assoc64 < Struct.new(:id2, :u1, :id, :table2)
		UNPACK_STR = 'VVT2'
		SIZE = 24

		def initialize data
			if String === data
				data = Pst.unpack data, UNPACK_STR
			end
			super(*data)
		end

		def self.load_chain idx
			buf = idx.read
			type, count = buf.unpack 'v2'
			unless type == 0x0002
				raise 'unknown id2 type 0x%04x' % type
				#return
			end
			id2 = []
			count.times do |i|
				assoc = new buf[8 + SIZE * i, SIZE]
				id2 << assoc
				if assoc.table2 != 0
					id2 += load_chain idx.pst.idx_from_id(assoc.table2)
				end
			end
			id2
		end
	end

	class ID2Mapping
		attr_reader :list
		def initialize pst, list
			@pst = pst
			@list = list
			# create a lookup. 
			@id_from_id2 = {}
			@list.each do |id2|
				# NOTE we take the last value seen value if there are duplicates. this "fixes"
				# test4-o1997.pst for the time being.
				warn "there are duplicate id2 records with id #{id2.id2}" if @id_from_id2[id2.id2]
				next if @id_from_id2[id2.id2]
				@id_from_id2[id2.id2] = id2.id
			end
		end

		# TODO: fix logging
		def warn s
			Mapi::Log.warn s
		end

		# corresponds to:
		# * _pst_getID2
		def [] id
			#id2 = @list.find { |x| x.id2 == id }
			id = @id_from_id2[id]
			id and @pst.idx_from_id(id)
		end
	end

	def load_idx2 idx
		if header.version_2003?
			id2 = ID2Assoc64.load_chain idx
		else
			id2 = load_idx2_rec idx
		end
		ID2Mapping.new self, id2
	end

	# corresponds to
	# * _pst_build_id2
	def load_idx2_rec idx
		# i should perhaps use a idx chain style read here?
		buf = pst_read_block_size idx.offset, idx.size, false
		type, count = buf.unpack 'v2'
		unless type == 0x0002
			raise 'unknown id2 type 0x%04x' % type
			#return
		end
		id2 = []
		count.times do |i|
			assoc = ID2Assoc.new buf[4 + ID2Assoc::SIZE * i, ID2Assoc::SIZE]
			id2 << assoc
			if assoc.table2 != 0
				id2 += load_idx2_rec idx_from_id(assoc.table2)
			end
		end
		id2
	end

	class RangesIOIdxChain < RangesIOEncryptable
		def initialize pst, idx_head
			@idxs = pst.id2_block_idx_chain idx_head
			# whether or not a given idx needs encrypting
			decrypts = @idxs.map do |idx|
				decrypt = (idx.id & 2) != 0 ? false : pst.encrypted?
			end.uniq
			raise NotImplementedError, 'partial encryption in RangesIOID2' if decrypts.length > 1
			decrypt = decrypts.first
			# convert idxs to ranges
			ranges = @idxs.map { |idx| [idx.offset, idx.size] }
			super pst.io, :ranges => ranges, :decrypt => decrypt
		end
	end

	class RangesIOID2 < RangesIOIdxChain
		def self.new pst, id2, idx2
			RangesIOIdxChain.new pst, idx2[id2]
		end
	end

	# corresponds to:
	# * _pst_ff_getID2block
	# * _pst_ff_getID2data
	# * _pst_ff_compile_ID
	def id2_block_idx_chain idx
		if (idx.id & 0x2) == 0
			[idx]
		else
			buf = idx.read
			type, fdepth, count = buf[0, 4].unpack 'CCv'
			unless type == 1 # libpst.c:3958
				warn 'Error in idx_chain - %p, %p, %p - attempting to ignore' % [type, fdepth, count]
				return [idx]
			end
			# there are 4 unaccounted for bytes here, 4...8
			if header.version_2003?
				ids = buf[8, count * 8].unpack("T#{count}")
			else
				ids = buf[8, count * 4].unpack('V*')
			end
			if fdepth == 1
				ids.map { |id| idx_from_id id }
			else
				ids.map { |id| id2_block_idx_chain idx_from_id(id) }.flatten
			end
		end
	end

	#
	# main block parsing code. gets raw properties
	# ----------------------------------------------------------------------------
	#

	# the job of this class, is to take a desc record, and be able to enumerate through the
	# mapi properties of the associated thing.
	#
	# corresponds to
	# * _pst_parse_block
	# * _pst_process (in some ways. although perhaps thats more the Item::Properties#add_property)
	class BlockParser
		include Mapi::Types::Constants

		TYPES = {
			0xbcec => 1,
			0x7cec => 2,
			# type 3 is removed. an artifact of not handling the indirect blocks properly in libpst.
		}

		PR_SUBJECT = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_SUBJECT' }.first.hex
		PR_BODY_HTML = PropertySet::TAGS.find { |num, (name, type)| name == 'PR_BODY_HTML' }.first.hex

		# this stuff could maybe be moved to Ole::Types? or leverage it somehow?
		# whether or not a type is immeidate is more a property of the pst encoding though i expect.
		# what i probably can add is a generic concept of whether a type is of variadic length or not.

		# these lists are very incomplete. think they are largely copied from libpst

		IMMEDIATE_TYPES = [
			PT_SHORT, PT_LONG, PT_BOOLEAN
		]

		INDIRECT_TYPES = [
			PT_DOUBLE, PT_OBJECT,
			0x0014, # whats this? probably something like PT_LONGLONG, given the correspondence with the
							# ole variant types. (= VT_I8)
			PT_STRING8, PT_UNICODE, # unicode isn't in libpst, but added here for outlook 2003 down the track
			PT_SYSTIME,
			0x0048, # another unknown
			0x0102, # this is PT_BINARY vs PT_CLSID
			#0x1003, # these are vector types, but they're commented out for now because i'd expect that
			#0x1014, # there's extra decoding needed that i'm not doing. (probably just need a simple
			#        # PT_* => unpack string mapping for the immediate types, and just do unpack('V*') etc
			#0x101e,
			#0x1102
		]

		# the attachment and recipient arrays appear to be always stored with these fixed
		# id2 values. seems strange. are there other extra streams? can find out by making higher
		# level IO wrapper, which has the id2 value, and doing the diff of available id2 values versus
		# used id2 values in properties of an item.
		ID2_ATTACHMENTS = 0x671
		ID2_RECIPIENTS = 0x692

		attr_reader :desc, :data, :data_chunks, :offset_tables
		def initialize desc
			raise FormatError, "unable to get associated index record for #{desc.inspect}" unless desc.desc
			@desc = desc
			#@data = desc.desc.read
			if Pst::Index === desc.desc
				#@data = RangesIOIdxChain.new(desc.pst, desc.desc).read
				idxs = desc.pst.id2_block_idx_chain desc.desc
				# this gets me the plain index chain.
			else
				# fake desc
				#@data = desc.desc.read
				idxs = [desc.desc]
			end

			@data_chunks = idxs.map { |idx| idx.read }
			@data = @data_chunks.first

			load_header

			@index_offsets = [@index_offset] + @data_chunks[1..-1].map { |chunk| chunk.unpack('v')[0] }
			@offset_tables = []
			@ignored = []
			@data_chunks.zip(@index_offsets).each do |chunk, offset|
				ignore = chunk[offset, 2].unpack('v')[0]
				@ignored << ignore
#				p ignore
				@offset_tables.push offset_table = []
				# maybe its ok if there aren't to be any values ?
				raise FormatError if offset == 0
				offsets = chunk[offset + 2..-1].unpack('v*')
				#p offsets
				offsets[0, ignore + 2].each_cons 2 do |from, to|
					#next if to == 0
					raise FormatError, [from, to].inspect if from > to
					offset_table << [from, to]
				end
			end

			@offset_table = @offset_tables.first
			@idxs = idxs

			# now, we may have multiple different blocks
		end

		# a given desc record may or may not have associated idx2 data. we lazily load it here, so it will never
		# actually be requested unless get_data_indirect actually needs to use it.
		def idx2
			return @idx2 if @idx2
			raise FormatError, 'idx2 requested but no idx2 available' unless desc.list_index
			# should check this can't return nil
			@idx2 = desc.pst.load_idx2 desc.list_index
		end

		def load_header
			@index_offset, type, @offset1 = data.unpack 'vvV'
			raise FormatError, 'unknown block type signature 0x%04x' % type unless TYPES[type]
			@type = TYPES[type]
		end

		# based on the value of offset, return either some data from buf, or some data from the
		# id2 chain id2, where offset is some key into a lookup table that is stored as the id2
		# chain. i think i may need to create a BlockParser class that wraps up all this mess.
		#
		# corresponds to:
		# * _pst_getBlockOffsetPointer
		# * _pst_getBlockOffset
		def get_data_indirect offset
			return get_data_indirect_io(offset).read

			if offset == 0
				nil
			elsif (offset & 0xf) == 0xf
				RangesIOID2.new(desc.pst, offset, idx2).read
			else
				low, high = offset & 0xf, offset >> 4
				raise FormatError if low != 0 or (high & 0x1) != 0 or (high / 2) > @offset_table.length
				from, to = @offset_table[high / 2]
				data[from...to]
			end
		end

		def get_data_indirect_io offset
			if offset == 0
				nil
			elsif (offset & 0xf) == 0xf
				if idx2[offset]
					RangesIOID2.new desc.pst, offset, idx2
				else
					warn "tried to get idx2 record for #{offset} but failed"
					return StringIO.new('')
				end
			else
				low, high = offset & 0xf, offset >> 4
				if low != 0 or (high & 0x1) != 0
#				raise FormatError, 
					warn "bad - #{low} #{high} (1)" 
					return StringIO.new('')
				end
				# lets see which block it should come from.
				block_idx, i = high.divmod 4096
				unless block_idx < @data_chunks.length
					warn "bad - block_idx to high (not #{block_idx} < #{@data_chunks.length})"
					return StringIO.new('')
				end
				data_chunk, offset_table = @data_chunks[block_idx], @offset_tables[block_idx]
				if i / 2 >= offset_table.length
					warn "bad - #{low} #{high} - #{i / 2} >= #{offset_table.length} (2)"
					return StringIO.new('')
				end
				#warn "ok  - #{low} #{high} #{offset_table.length}"
				from, to = offset_table[i / 2]
				StringIO.new data_chunk[from...to]
			end
		end

		def handle_indirect_values key, type, value
			case type
			when PT_BOOLEAN
				value = value != 0
			when *IMMEDIATE_TYPES # not including PT_BOOLEAN which we just did above
				# no processing current applied (needed?).
			when *INDIRECT_TYPES
				# the value is a pointer
				if String === value # ie, value size > 4 above
					value = StringIO.new value
				else
					value = get_data_indirect_io(value)
				end
				# keep strings as immediate values for now, for compatability with how i set up
				# Msg::Properties::ENCODINGS
				if value
					if type == PT_STRING8
						value = value.read
					elsif type == PT_UNICODE
						value = Ole::Types::FROM_UTF16.iconv value.read
					end
				end
				# special subject handling
				if key == PR_BODY_HTML and value
					# to keep the msg code happy, which thinks body_html will be an io
					# although, in 2003 version, they are 0102 already
					value = StringIO.new value unless value.respond_to?(:read)
				end
				if key == PR_SUBJECT and value
					ignore, offset = value.unpack 'C2'
					offset = (offset == 1 ? nil : offset - 3)
					value = value[2..-1]
=begin
					index = value =~ /^[A-Z]*:/ ? $~[0].length - 1 : nil
					unless ignore == 1 and offset == index
						warn 'something wrong with subject hack' 
						$x = [ignore, offset, value]
						require 'irb'
						IRB.start
						exit
					end
=end
=begin
new idea:

making sense of the \001\00[156] i've seen prefixing subject. i think its to do with the placement
of the ':', or the ' '. And perhaps an optimization to do with thread topic, and ignoring the prefixes
added by mailers. thread topic is equal to subject with all that crap removed.

can test by creating some mails with bizarre subjects.

subject="\001\005RE: blah blah"
subject="\001\001blah blah"
subject="\001\032Out of Office AutoReply: blah blah"
subject="\001\020Undeliverable: blah blah"

looks like it

=end

					# now what i think, is that perhaps, value[offset..-1] ...
					# or something like that should be stored as a special tag. ie, do a double yield
					# for this case. probably PR_CONVERSATION_TOPIC, in which case i'd write instead:
					# yield [PR_SUBJECT, ref_type, value]
					# yield [PR_CONVERSATION_TOPIC, ref_type, value[offset..-1]
					# next # to skip the yield.
				end

				# special handling for embedded objects
				# used for attach_data for attached messages. in which case attach_method should == 5,
				# for embedded object.
				if type == PT_OBJECT and value
					value = value.read if value.respond_to?(:read)
					id2, unknown = value.unpack 'V2'
					io = RangesIOID2.new desc.pst, id2, idx2

					# hacky
					desc2 = OpenStruct.new(:desc => io, :pst => desc.pst, :list_index => desc.list_index, :children => [])
					# put nil instead of desc.list_index, otherwise the attachment is attached to itself ad infinitum.
					# should try and fix that FIXME
					# this shouldn't be done always. for an attached message, yes, but for an attached
					# meta file, for example, it shouldn't. difference between embedded_ole vs embedded_msg
					# really.
					# note that in the case where its a embedded ole, you actually get a regular serialized ole
					# object, so i need to create an ole storage object on a rangesioidxchain!
					# eg:
=begin
att.props.display_name # => "Picture (Metafile)"
io = att.props.attach_data
io.read(32).unpack('H*') # => ["d0cf11e0a1b11ae100000.... note the docfile signature.
# plug some missing rangesio holes:
def io.rewind; seek 0; end
def io.flush; raise IOError; end
ole = Ole::Storage.open io
puts ole.root.to_tree

- #<Dirent:"Root Entry">
  |- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
  |- #<Dirent:"CONTENTS" size=65696 data="\327\315\306\232\000...">
  \- #<Dirent:"\003MailStream" size=12 data="\001\000\000\000[...">
=end
					# until properly fixed, i have disabled this code here, so this will break
					# nested messages temporarily.
					#value = Item.new desc2, RawPropertyStore.new(desc2).to_a
					#desc2.list_index = nil
					value = io
				end
			# this is PT_MV_STRING8, i guess.
			# should probably have the 0x1000 flag, and do the or-ring.
			# example of 0x1102 is PR_OUTLOOK_2003_ENTRYIDS. less sure about that one.
			when 0x101e, 0x1102 
				# example data:
				# 0x802b "\003\000\000\000\020\000\000\000\030\000\000\000#\000\000\000BusinessCompetitionFavorites"
				# this 0x802b would be an extended attribute for categories / keywords.
				value = get_data_indirect_io(value).read unless String === value
				num = value.unpack('V')[0]
				offsets = value[4, 4 * num].unpack("V#{num}")
				value = (offsets + [value.length]).to_enum(:each_cons, 2).map { |from, to| value[from...to] }
				value.map! { |str| StringIO.new str } if type == 0x1102
			else
				name = Mapi::Types::DATA[type].first rescue nil
				warn '0x%04x %p' % [key, get_data_indirect_io(value).read]
				raise NotImplementedError, 'unsupported mapi property type - 0x%04x (%p)' % [type, name]
			end
			[key, type, value]
		end
	end

=begin
* recipients:

	affects: ["0x200764", "0x2011c4", "0x201b24", "0x201b44", "0x201ba4", "0x201c24", "0x201cc4", "0x202504"]

after adding the rawpropertystoretable fix, all except the second parse properly, and satisfy:

  item.props.display_to == item.recipients.map { |r| r.props.display_name if r.props.recipient_type == 1 }.compact * '; '

only the second still has a problem

#[#<struct Pst::Desc desc_id=0x2011c4, idx_id=0x397c, idx2_id=0x398a, parent_desc_id=0x8082>]

think this is related to a multi block #data3. ie, when you use @x * rec_size, and it
goes > 8190, or there abouts, then it stuffs up. probably there is header gunk, or something,
similar to when #data is multi block.

same problem affects the attachment table in test4. 

fixed that issue. round data3 ranges to rec_size. 

fix other issue with attached objects.

all recipients and attachments in test2 are fine.

only remaining issue is test4 recipients of 200044. strange.

=end

	# RawPropertyStore is used to iterate through the properties of an item, or the auxiliary
	# data for an attachment. its just a parser for the way the properties are serialized, when the
	# properties don't have to conform to a column structure.
	#
	# structure of this chunk of data is often
	#   header, property keys, data values, and then indexes.
	# the property keys has value in it. value can be the actual value if its a short type,
	# otherwise you lookup the value in the indicies, where you get the offsets to use in the
	# main data body. due to the indirect thing though, any of these parts could actually come
	# from a separate stream.
	class RawPropertyStore < BlockParser
		include Enumerable

		attr_reader :length
		def initialize desc
			super
			raise FormatError, "expected type 1 - got #{@type}" unless @type == 1

			# the way that offset works, data1 may be a subset of buf, or something from id2. if its from buf,
			# it will be offset based on index_offset and offset. so it could be some random chunk of data anywhere
			# in the thing.
			header_data = get_data_indirect @offset1
			raise FormatError if header_data.length < 8
			signature, offset2 = header_data.unpack 'V2'
			#p [@type, signature]
			raise FormatError, 'unhandled block signature 0x%08x' % @type if signature != 0x000602b5
			# this is actually a big chunk of tag tuples.
			@index_data = get_data_indirect offset2
			@length = @index_data.length / 8
		end

		# iterate through the property tuples
		def each
			length.times do |i|
				key, type, value = handle_indirect_values(*@index_data[8 * i, 8].unpack('vvV'))
				yield key, type, value
			end
		end
	end

	# RawPropertyStoreTable is kind of like a database table.
	# it has a fixed set of columns.
	# #[] is kind of like getting a row from the table.
	# those rows are currently encapsulated by Row, which has #each like
	# RawPropertyStore.
	# only used for the recipients array, and the attachments array. completely lazy, doesn't
	# load any of the properties upon creation. 
	class RawPropertyStoreTable < BlockParser
		class Column < Struct.new(:ref_type, :type, :ind2_off, :size, :slot)
			def initialize data
				super(*data.unpack('v3CC'))
			end

			def nice_type_name
				Mapi::Types::DATA[ref_type].first[/_(.*)/, 1].downcase rescue '0x%04x' % ref_type
			end

			def nice_prop_name
				Mapi::PropertyStore::TAGS['%04x' % type].first[/_(.*)/, 1].downcase rescue '0x%04x' % type
			end

			def inspect
				"#<#{self.class} name=#{nice_prop_name.inspect}, type=#{nice_type_name.inspect}>"
			end
		end

		include Enumerable

		attr_reader :length, :index_data, :data2, :data3, :rec_size
		def initialize desc
			super
			raise FormatError, "expected type 2 - got #{@type}" unless @type == 2

			header_data = get_data_indirect @offset1
			# seven_c_blk
			# often: u1 == u2 and u3 == u2 + 2, then rec_size == u3 + 4. wtf
			seven_c, @num_list, u1, u2, u3, @rec_size, b_five_offset,
				ind2_offset, u7, u8 = header_data[0, 22].unpack('CCv4V2v2')
			@index_data = header_data[22..-1]

			raise FormatError if @num_list != schema.length or seven_c != 0x7c
			# another check
			min_size = schema.inject(0) { |total, col| total + col.size }
			# seem to have at max, 8 padding bytes on the end of the record. not sure if it means
			# anything. maybe its just space that hasn't been reclaimed due to columns being
			# removed or something. probably should just check lower bound. 
			range = (min_size..min_size + 8)
			warn "rec_size seems wrong (#{range} !=== #{rec_size})" unless range === rec_size

			header_data2 = get_data_indirect b_five_offset
			raise FormatError if header_data2.length < 8
			signature, offset2 = header_data2.unpack 'V2'
			# ??? seems a bit iffy
			# there's probably more to the differences than this, and the data2 difference below
			expect = desc.pst.header.version_2003? ? 0x000404b5 : 0x000204b5
			raise FormatError, 'unhandled block signature 0x%08x' % signature if signature != expect

			# this holds all the row data
			# handle multiple block issue.
			@data3_io = get_data_indirect_io ind2_offset
			if RangesIOIdxChain === @data3_io
				@data3_idxs = 
				# modify ranges
				ranges = @data3_io.ranges.map { |offset, size| [offset, size / @rec_size * @rec_size] }
				@data3_io.instance_variable_set :@ranges, ranges
			end
			@data3 = @data3_io.read

			# there must be something to the data in data2. i think data2 is the array of objects essentially.
			# currently its only used to imply a length
			# actually, at size 6, its just some auxiliary data. i'm thinking either Vv/vV, for 97, and something
			# wider for 03. the second value is just the index (0...length), and the first value is
			# some kind of offset i expect. actually, they were all id2 values, in another case.
			# so maybe they're get_data_indirect values too?
			# actually, it turned out they were identical to the PR_ATTACHMENT_ID2 values...
			# id2_values = ie, data2.unpack('v*').to_enum(:each_slice, 3).transpose[0]
			# table[i].assoc(PR_ATTACHMENT_ID2).last == id2_values[i], for all i. 
			@data2 = get_data_indirect(offset2) rescue nil
			#if data2
			#	@length = (data2.length / 6.0).ceil
			#else
			# the above / 6, may have been ok for 97 files, but the new 0x0004 style block must have
			# different size records... just use this instead:
				# hmmm, actually, we can still figure it out:
				@length = @data3.length / @rec_size
			#end

			# lets try and at least use data2 for a warning for now
			if data2
				data2_rec_size = desc.pst.header.version_2003? ? 8 : 6
				warn 'somthing seems wrong with data3' unless @length == (data2.length / data2_rec_size)
			end
		end

		def schema
			@schema ||= index_data.scan(/.{8}/m).map { |data| Column.new data }
		end

		def [] idx
			# handle funky rounding
			Row.new self, idx * @rec_size
		end

		def each
			length.times { |i| yield self[i] }
		end

		class Row
			include Enumerable

			def initialize array_parser, x
				@array_parser, @x = array_parser, x
			end

			# iterate through the property tuples
			def each
				(@array_parser.index_data.length / 8).times do |i|
					ref_type, type, ind2_off, size, slot = @array_parser.index_data[8 * i, 8].unpack 'v3CC'
					# check this rescue too
					value = @array_parser.data3[@x + ind2_off, size]
#					if INDIRECT_TYPES.include? ref_type
					if size <= 4
						value = value.unpack('V')[0]
					end
					#p ['0x%04x' % ref_type, '0x%04x' % type, (Msg::Properties::MAPITAGS['%04x' % type].first[/^.._(.*)/, 1].downcase rescue nil),
					#		value_orig, value, (get_data_indirect(value_orig.unpack('V')[0]) rescue nil), size, ind2_off, slot]
					key, type, value = @array_parser.handle_indirect_values type, ref_type, value
					yield key, type, value
				end
			end
		end
	end

	class AttachmentTable < BlockParser
		# a "fake" MAPI property name for this constant. if you get a mapi property with
		# this value, it is the id2 value to use to get attachment data.
		PR_ATTACHMENT_ID2 = 0x67f2

		attr_reader :desc, :table
		def initialize desc
			@desc = desc
			# no super, we only actually want BlockParser2#idx2
			@table = nil
			return unless desc.list_index
			return unless idx = idx2[ID2_ATTACHMENTS]
			# FIXME make a fake desc.
			@desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
			@table = RawPropertyStoreTable.new @desc2
		end

		def to_a
			return [] if !table
			table.map do |attachment|
				attachment = attachment.to_a
				#p attachment
				# potentially merge with yet more properties
				# this still seems pretty broken - especially the property overlap
				if attachment_id2 = attachment.assoc(PR_ATTACHMENT_ID2)
					#p attachment_id2.last
					#p idx2[attachment_id2.last]
					@desc2.desc = idx2[attachment_id2.last]
					RawPropertyStore.new(@desc2).each do |a, b, c|
						record = attachment.assoc a
						attachment << record = [] unless record
						record.replace [a, b, c]
					end
				end
				attachment
			end
		end
	end

	# there is no equivalent to this in libpst. ID2_RECIPIENTS was just guessed given the above
	# AttachmentTable.
	class RecipientTable < BlockParser
		attr_reader :desc, :table
		def initialize desc
			@desc = desc
			# no super, we only actually want BlockParser2#idx2
			@table = nil
			return unless desc.list_index
			return unless idx = idx2[ID2_RECIPIENTS]
			# FIXME make a fake desc.
			desc2 = OpenStruct.new :desc => idx, :pst => desc.pst, :list_index => desc.list_index
			@table = RawPropertyStoreTable.new desc2
		end

		def to_a
			return [] if !table
			table.map { |x| x.to_a }
		end
	end

	#
	# higher level item code. wraps up the raw properties above, and gives nice
	# objects to work with. handles item relationships too.
	# ----------------------------------------------------------------------------
	#

	def self.make_property_set property_list
		hash = property_list.inject({}) do |hash, (key, type, value)|
			hash.update PropertySet::Key.new(key) => value
		end
		PropertySet.new hash
	end

	class Attachment < Mapi::Attachment
		def initialize list
			super Pst.make_property_set(list)

			@embedded_msg = props.attach_data if Item === props.attach_data
		end
	end

	class Recipient < Mapi::Recipient
		def initialize list
			super Pst.make_property_set(list)
		end
	end

	class Item < Mapi::Message
		class EntryID < Struct.new(:u1, :entry_id, :id)
			UNPACK_STR = 'VA16V'

			def initialize data
				data = data.unpack(UNPACK_STR) if String === data
				super(*data)
			end
		end

		include RecursivelyEnumerable

		attr_accessor :type, :parent

		def initialize desc, list, type=nil
			@desc = desc
			super Pst.make_property_set(list)

			# this is kind of weird, but the ids of the special folders are stored in a hash
			# when the root item is loaded
			if ipm_wastebasket_entryid
				desc.pst.special_folder_ids[ipm_wastebasket_entryid] = :wastebasket
			end

			if finder_entryid
				desc.pst.special_folder_ids[finder_entryid] = :finder
			end

			# and then here, those are used, along with a crappy heuristic to determine if we are an
			# item
=begin
i think the low bits of the desc_id can give some info on the type.

it seems that 0x4 is for regular messages (and maybe contacts etc)
0x2 is for folders, and 0x8 is for special things like rules etc, that aren't visible.
=end
			unless type
				type = props.valid_folder_mask || ipm_subtree_entryid || props.content_count || props.subfolders ? :folder : :message
				if type == :folder
					type = desc.pst.special_folder_ids[desc.desc_id] || type
				end
			end

			@type = type
		end

		def each_child
			id = ipm_subtree_entryid
			if id
				root = @desc.pst.desc_from_id id
				raise "couldn't find root" unless root
				raise 'both kinds of children' unless @desc.children.empty?
				children = root.children
				# lets look up the other ids we have.
				# typically the wastebasket one "deleted items" is in the children already, but
				# the search folder isn't.
				extras = [ipm_wastebasket_entryid, finder_entryid].compact.map do |id|
					root = @desc.pst.desc_from_id id
					warn "couldn't find root for id #{id}" unless root
					root
				end.compact
				# i do this instead of union, so as not to mess with the order of the
				# existing children.
				children += (extras - children)
				children
			else
				@desc.children
			end.each do |desc|
				item = @desc.pst.pst_parse_item(desc)
				item.parent = self
				yield item
			end
		end

		def path
			parents, item = [], self
			parents.unshift item while item = item.parent
			# remove root
			parents.shift
			parents.map { |item| item.props.display_name or raise 'unable to construct path' } * '/'
		end

		def children
			to_enum(:each_child).to_a
		end

		# these are still around because they do different stuff

		# Top of Personal Folder Record
		def ipm_subtree_entryid
			@ipm_subtree_entryid ||= EntryID.new(props.ipm_subtree_entryid.read).id rescue nil
		end

		# Deleted Items Folder Record
		def ipm_wastebasket_entryid
			@ipm_wastebasket_entryid ||= EntryID.new(props.ipm_wastebasket_entryid.read).id rescue nil
		end

		# Search Root Record
		def finder_entryid
			@finder_entryid ||= EntryID.new(props.finder_entryid.read).id rescue nil
		end

		# all these have been replaced with the method_missing below
=begin
		# States which folders are valid for this message store 
		#def valid_folder_mask
		#	props[0x35df]
		#end

		# Number of emails stored in a folder
		def content_count
			props[0x3602] 
		end

		# Has children
		def subfolders
			props[0x360a]
		end
=end

		# i think i will change these, so they can inherit the lazyness from RawPropertyStoreTable.
		# so if you want the last attachment, you can get it without creating the others perhaps.
		# it just has to handle the no table at all case a bit more gracefully.

		def attachments
			@attachments ||= AttachmentTable.new(@desc).to_a.map { |list| Attachment.new list }
		end

		def recipients
			#[]
			@recipients ||= RecipientTable.new(@desc).to_a.map { |list| Recipient.new list }
		end

		def each_recursive(&block)
			#p :self => self
			children.each do |child|
				#p :child => child
				block[child]
				child.each_recursive(&block)
			end
		end

		def inspect
			attrs = %w[display_name subject sender_name subfolders]
#			attrs = %w[display_name valid_folder_mask ipm_wastebasket_entryid finder_entryid content_count subfolders]
			str = attrs.map { |a| b = props.send a; " #{a}=#{b.inspect}" if b }.compact * ','

			type_s = type == :message ? 'Message' : type == :folder ? 'Folder' : type.to_s.capitalize + 'Folder'
			str2 = 'desc_id=0x%x' % @desc.desc_id

			!str.empty? ? "#<Pst::#{type_s} #{str2}#{str}>" : "#<Pst::#{type_s} #{str2} props=#{props.inspect}>" #\n" + props.transport_message_headers + ">"
		end
	end

	# corresponds to
	# * _pst_parse_item
	def pst_parse_item desc
		Item.new desc, RawPropertyStore.new(desc).to_a
	end

	#
	# other random code
	# ----------------------------------------------------------------------------
	#

	def dump_debug_info
		puts "* pst header"
		p header

=begin
Looking at the output of this, for blank-o1997.pst, i see this part:
...
- (26624,516) desc block data (overlap of 4 bytes)
- (27136,516) desc block data (gap of 508 bytes)
- (28160,516) desc block data (gap of 2620 bytes)
...

which confirms my belief that the block size for idx and desc is more likely 512
=end
		if 0 + 0 == 0
			puts '* file range usage'
			file_ranges =
				# these 3 things, should account for most of the data in the file.
				[[0, Header::SIZE, 'pst file header']] +
				@idx_offsets.map { |offset| [offset, Index::BLOCK_SIZE, 'idx block data'] } +
				@desc_offsets.map { |offset| [offset, Desc::BLOCK_SIZE, 'desc block data'] } +
				@idx.map { |idx| [idx.offset, idx.size, 'idx id=0x%x (%s)' % [idx.id, idx.type]] }
			(file_ranges.sort_by { |idx| idx.first } + [nil]).to_enum(:each_cons, 2).each do |(offset, size, name), next_record|
				# i think there is a padding of the size out to 64 bytes
				# which is equivalent to padding out the final offset, because i think the offset is 
				# similarly oriented
				pad_amount = 64
				warn 'i am wrong about the offset padding' if offset % pad_amount != 0
				# so, assuming i'm not wrong about that, then we can calculate how much padding is needed.
				pad = pad_amount - (size % pad_amount)
				pad = 0 if pad == pad_amount
				gap = next_record ? next_record.first - (offset + size + pad) : 0
				extra = case gap <=> 0
					when -1; ["overlap of #{gap.abs} bytes)"]
					when  0; []
					when +1; ["gap of #{gap} bytes"]
				end
				# how about we check that padding
				@io.pos = offset + size
				pad_bytes = @io.read(pad)
				extra += ["padding not all zero"] unless pad_bytes == 0.chr * pad
				puts "- #{offset}:#{size}+#{pad} #{name.inspect}" + (extra.empty? ? '' : ' [' + extra * ', ' + ']')
			end
		end

		# i think the idea of the idx, and indeed the idx2, is just to be able to
		# refer to data indirectly, which means it can get moved around, and you just update
		# the idx table. it is simply a list of file offsets and sizes.
		# not sure i get how id2 plays into it though....
		# the sizes seem to be all even. is that a co-incidence? and the ids are all even. that
		# seems to be related to something else (see the (id & 2) == 1 stuff)
		puts '* idx entries'
		@idx.each { |idx| puts "- #{idx.inspect}" }

		# if you look at the desc tree, you notice a few things:
		# 1. there is a desc that seems to be the parent of all the folders, messages etc.
		#    it is the one whose parent is itself.
		#    one of its children is referenced as the subtree_entryid of the first desc item,
		#    the root.
		# 2. typically only 2 types of desc records have idx2_id != 0. messages themselves,
		#    and the desc with id = 0x61 - the xattrib container. everything else uses the
		#    regular ids to find its data. i think it should be reframed as small blocks and
		#    big blocks, but i'll look into it more.
		#
		# idx_id and idx2_id are for getting to the data. desc_id and parent_desc_id just define
		# the parent <-> child relationship, and the desc_ids are how the items are referred to in
		# entryids.
		# note that these aren't unique! eg for 0, 4 etc. i expect these'd never change, as the ids
		# are stored in entryids. whereas the idx and idx2 could be a bit more volatile.
		puts '* desc tree'
		# make a dummy root hold everything just for convenience
		root = Desc.new ''
		def root.inspect; "#<Pst::Root>"; end
		root.children.replace @orphans
		# this still loads the whole thing as a string for gsub. should use directo output io
		# version.
		puts root.to_tree.gsub(/, (parent_desc_id|idx2_id)=0x0(?!\d)/, '')

		# this is fairly easy to understand, its just an attempt to display the pst items in a tree form
		# which resembles what you'd see in outlook.
		puts '* item tree'
		# now streams directly
		root_item.to_tree STDOUT
	end

	def root_desc
		@desc.first
	end

	def root_item
		item = pst_parse_item root_desc
		item.type = :root
		item
	end

	def root
		root_item
	end

	# depth first search of all items
	include Enumerable

	def each(&block)
		root = self.root
		block[root]
		root.each_recursive(&block)
	end

	def name
		@name ||= root_item.props.display_name
	end
	
	def inspect
		"#<Pst name=#{name.inspect} io=#{io.inspect}>"
	end
end
end