aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/ruby-msg/lib/rtf.rb
blob: 3afac68a8536fca90a396af2355355d6bd17b652 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
require 'stringio'

# this file is pretty crap, its just to ensure there is always something readable if
# there is an rtf only body, with no html encapsulation.

module RTF
	class Tokenizer
		def self.process io
			while true do
				case c = io.getc
				when ?{; yield :open_group
				when ?}; yield :close_group
				when ?\\
					case c = io.getc
					when ?{, ?}, ?\\; yield :text, c.chr
					when ?'; yield :text, [io.read(2)].pack('H*')
					when ?a..?z, ?A..?Z
						# read control word
						str = c.chr
						str << c while c = io.read(1) and c =~ /[a-zA-Z]/
						neg = 1
						neg = -1 and c = io.read(1) if c == '-'
						num = if c =~ /[0-9]/
							num = c
							num << c while c = io.read(1) and c =~ /[0-9]/
							num.to_i * neg
						end
						raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
						io.seek(-1, IO::SEEK_CUR) if c != ' '
						yield :control_word, str, num
					when nil
						raise "invalid rtf stream" # \EOF
					else
						# other kind of control symbol
						yield :control_symbol, c.chr
					end
				when nil
					return
				when ?\r, ?\n
					# ignore
				else yield :text, c.chr
				end
			end
		end
	end

	class Converter
		# crappy
		def self.rtf2text str, format=:text
			group = 0
			text = ''
			text << "<html>\n<body>" if format == :html
			group_type = []
			group_tags = []
			RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
				add_text = ''
				case a
				when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
				when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
				when :control_word; # ignore
					group_type[group] ||= b
					# maybe change this to use utf8 where possible
					add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
					elsif b == 'tab' || b == 'cell'; "\t"
					elsif b == 'endash' || b == 'emdash'; "-"
					elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
					elsif b == 'ldblquote'; '"'
					else ''
					end
					if b == 'b' || b == 'i' and format == :html
						close = c == 0 ? '/' : ''
						text << "<#{close}#{b}>"
						if c == 0
							group_tags[group].delete b
						else
							group_tags[group] << b
						end
					end
					# lot of other ones belong in here.\
=begin
\bullet 	Bullet character.
\lquote 	Left single quotation mark.
\rquote 	Right single quotation mark.
\ldblquote 	Left double quotation mark.
\rdblquote
=end
				when :control_symbol; # ignore
					 group_type[group] ||= b
					add_text = ' ' if b == '~' # non-breakable space
					add_text = '-' if b == '_' # non-breakable hypen
				when :text
					add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
				end
				if format == :html
					text << add_text.gsub(/([<>&"'])/) do
						ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
						"&#{ent};"
					end
					text << '<br>' if add_text == "\n"
				else
					text << add_text
				end
			end
			text << "</body>\n</html>\n" if format == :html
			text
		end
	end
end