1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
/* LanguageTool, a natural language style checker
* Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package de.danielnaber.languagetool.rules.en;
import java.util.ResourceBundle;
import java.util.regex.Pattern;
import de.danielnaber.languagetool.AnalyzedTokenReadings;
import de.danielnaber.languagetool.Language;
import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;
public class EnglishUnpairedBracketsRule extends GenericUnpairedBracketsRule {
private static final String[] EN_START_SYMBOLS = { "[", "(", "{", "“", "\"", "'" };
private static final String[] EN_END_SYMBOLS = { "]", ")", "}", "”", "\"", "'" };
private static final Pattern NUMBER = Pattern.compile("\\d+");
public EnglishUnpairedBracketsRule(final ResourceBundle messages,
final Language language) {
super(messages, language);
startSymbols = EN_START_SYMBOLS;
endSymbols = EN_END_SYMBOLS;
}
public String getId() {
return "EN_UNPAIRED_BRACKETS";
}
protected boolean isNoException(final String token,
final AnalyzedTokenReadings[] tokens, final int i, final int j, final boolean precSpace,
final boolean follSpace) {
//TODO: add an', o', 'till, 'tain't, 'cept, 'fore in the disambiguator
//and mark up as contractions somehow
// add exception for dates like '52
if (i <= 1) {
return true;
}
if (!precSpace && follSpace) {
// exception for English inches, e.g., 20"
if ("\"".equals(token)
&& NUMBER.matcher(tokens[i - 1].getToken()).matches()) {
return false;
}
// Exception for English plural Saxon genetive
// current disambiguation scheme is a bit too greedy
// for adjectives
if ("'".equals(token) && tokens[i].hasPosTag("POS")) {
return false;
}
// puttin' on the Ritz
if ("'".equals(token) && tokens[i - 1].hasPosTag("VBG")
&& tokens[i - 1].getToken().endsWith("in")) {
return false;
}
}
if (precSpace && !follSpace) {
// hold 'em!
if ("'".equals(token) && i + 1 < tokens.length
&& "em".equals(tokens[i + 1].getToken())) {
return false;
}
}
return true;
}
}
|