1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
: Use /bin/bash
# Copyright: Rune Kleveland (2000) <runekl@math.uio.no>
# License : GPL
# Version : 0.9
# This script takes a (TeX) file as argument and prints every word not
# in the Norwegian dictionary hyphenated by TeX. The parsing of the
# file is done by ispell.
# It can be used to find out which words the Norwegian patterns
# hyphenates incorrectly, because the Norwegian patterns I have
# generated should hyphenate every word in the norsk and nynorsk
# dictionaries correctly. The incorrecly hyphenated words should be
# included in a \hyphenation command. If there already are
# hyphenation commands in the TeX document, that is taken into
# consideration unless the -noparse option is given.
# If you tell me about incorrectly hyphenated common words, I might
# fix it in a future version.
# If multi level hyphenation is available, it is used. Unfortunately
# this requires an experimental TeX today, and few people seem to have
# that.
# nohyphinsecure [-p patterns] [-l language] [-e] [-h] [file]
#
# -p patterns Choose the patterns to hyphenate with.
# The default is norsk, in which case TeX
# executes \language=\l@norsk.
#
# -l language Choose the ispell dictionary. The default is norsk.
#
# -ll language Filter through extra dictionary. The default is nynorsk.
# Use false to avoid filtering.
#
# -e Throw away all english words.
#
# -h Print only words that does contain a hyphen
#
# -nosort Do not sort the words, but output in the order they appear
#
# -nroff Ispell parsing keyword. Overrride default TeX.
#
# -noparse Don't try to find hyphenation commands in the input file.
TMP=/tmp
ISPELLMODE=tex
LATEX=latex
LANGUAGE=norsk
LLANGUAGE=nynorsk
CH=a-z����������A-Z����������
PATTERNS=${LANGUAGE}
IGNOREENGLISH=false
FORMAT=-t
ONLYHYPHEN=false
PARSEFORHYPH=true
SORTING=true
while [ $# != 0 ]
do
case "$1" in
-p)
PATTERNS=$2
shift
;;
-l)
LANGUAGE=$2
shift
;;
-ll)
LLANGUAGE=$2
shift
;;
-e)
IGNOREENGLISH=true
;;
-h)
ONLYHYPHEN=true
;;
-nroff)
FORMAT=-n
;;
-noparse)
PARSEFORHYPH=false
;;
-nosort)
SORTING=false
;;
-)
break
;;
-*)
echo 'Usage: nohyphinsecure [-p patterns] [-l dictionary] [-ll dictionary] [-e] [-h] [file] ...' \
1>&2
exit 2
;;
*)
break
;;
esac
shift
done
# Parse for \hyphenation command. Assumes you use TeX gently.
if [ ${PARSEFORHYPH} = true ]
then
cat $@ > ${TMP}/hyphen0.tmp
sed -e 's/%.*//' \
-e '/\\hyphenation[ ]*{.*}/ p' \
-e '/\\hyphenation[ ]*{[^}]*$/,/}/! D' ${TMP}/hyphen0.tmp \
| sed -e 's/^.\+\(\\hyphenation[ ]*{\)/\1/' \
> ${TMP}/hyphen1.tmp
cat ${TMP}/hyphen0.tmp
else
rm -f ${TMP}/hyphen1.tmp
cat $@
fi | \
if [ ${LANGUAGE} = false ]
then
tr -cs ${CH} '\n'
elif [ ${LLANGUAGE} = false ]
then
tr '.,;:' ' ' \
| ispell -B -l -d ${LANGUAGE} ${FORMAT}
else
tr '.,;:' ' ' \
| ispell -B -l -d ${LANGUAGE} ${FORMAT} \
| ispell -B -l -d ${LLANGUAGE} ${FORMAT}
fi \
> ${TMP}/hyphen2.tmp
cd ${TMP}
if [ "${IGNOREENGLISH}" = true ]
then
(grep -v '[^a-zA-Z]' hyphen2.tmp | ispell -l -d english; \
grep '[^a-zA-Z]' hyphen2.tmp)
else
cat hyphen2.tmp
fi | \
if [ $SORTING = true ]
then
sort \
| uniq -c \
| sort -n -r -s
else
cat
fi \
| sed -e '1 i \
\\writelog{' \
-e '1000~1000 a \
}\
\\writelog{' \
-e '$ a \
}' \
> hyphen3.tmp
TEXFILE='\nonstopmode
\documentclass{minimal}
\usepackage{t1enc}
\makeatletter
\language=\l@'${PATTERNS}'\lefthyphenmin=2\righthyphenmin=2
\ifx\gendiscretionary\@undefined\else
\hyphenclassesstate=1\hyphenclasses=5\exhyphenclass=4
\fi
\InputIfFileExists{./hyphen1.tmp}{}
\makeatother
\def\writelog#1{\setbox0=\vbox{\parfillskip0pt \hsize16383.99999pt
\pretolerance=-1 \tolerance=-1 \hbadness=0 \showboxdepth=0 \ #1}}
\begin{document}
\input{hyphen3.tmp}
\typeout{----------}
\end{document}'
${LATEX} ${TEXFILE} 2&>/dev/null
rm -f hyphen[0123].tmp
# Parse the log file
sed -e '1,/(hyphen3.tmp/ D' \
-e '/\\hbox/ D' \
-e '/^ *$/ D' \
-e 's/^\(\[\]\)\? *\\T1[^ ]* /*/' \
-e '/^----------/,$ c \
' minimal.log \
| tr -d '\n)' \
| tr -s ' ' '\n' \
| sed -e 's/-\*//' \
-e '1 s/\*//' \
-e 's/\*/\
/' -e '/^ *$/ D' \
-e 's/\^\^c5/�/g' \
-e 's/\^\^c6/�/g' \
-e 's/\^\^d8/�/g' \
-e 's/\^\^c7/�/g' \
-e 's/\^\^c8/�/g' \
-e 's/\^\^c9/�/g' \
-e 's/\^\^d2/�/g' \
-e 's/\^\^d3/�/g' \
-e 's/\^\^d4/�/g' \
-e 's/\^\^e5/�/g' \
-e 's/\^\^e6/�/g' \
-e 's/\^\^f8/�/g' \
-e 's/\^\^e7/�/g' \
-e 's/\^\^e8/�/g' \
-e 's/\^\^e9/�/g' \
-e 's/\^\^f2/�/g' \
-e 's/\^\^f3/�/g' \
-e 's/\^\^f4/�/g' | \
if [ $SORTING = true ]
then
sed -e N -e 's/\n/ /'
else
cat
fi | \
if [ "${ONLYHYPHEN}" = true ]
then
grep '\-'
else
cat
fi
rm -f minimal.log minimal.aux minimal.dvi
|