summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRule.java
blob: 4b32c05b33cc0441ba3178df32524559080218b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

package de.danielnaber.languagetool.rules.en;

import java.util.ResourceBundle;
import java.util.regex.Pattern;

import de.danielnaber.languagetool.AnalyzedTokenReadings;
import de.danielnaber.languagetool.Language;
import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;

public class EnglishUnpairedBracketsRule extends GenericUnpairedBracketsRule {
  
  private static final String[] EN_START_SYMBOLS = { "[", "(", "{", "“", "\"", "'" };
  private static final String[] EN_END_SYMBOLS   = { "]", ")", "}", "”", "\"", "'" };
  
  private static final Pattern NUMBER = Pattern.compile("\\d+");

  public EnglishUnpairedBracketsRule(final ResourceBundle messages,
      final Language language) {
    super(messages, language);
    startSymbols = EN_START_SYMBOLS;
    endSymbols = EN_END_SYMBOLS;
  }

  public String getId() {
    return "EN_UNPAIRED_BRACKETS";
  }
  
  protected boolean isNoException(final String token,
      final AnalyzedTokenReadings[] tokens, final int i, final int j, final boolean precSpace,
      final boolean follSpace) {
       
    
//TODO: add an', o', 'till, 'tain't, 'cept, 'fore in the disambiguator
//and mark up as contractions somehow
// add exception for dates like '52    
   
    if (i <= 1) {
      return true;
    }
    
    if (!precSpace && follSpace) {
      // exception for English inches, e.g., 20"
      if ("\"".equals(token)
          && NUMBER.matcher(tokens[i - 1].getToken()).matches()) {
        return false;
      }
      // Exception for English plural Saxon genetive
      // current disambiguation scheme is a bit too greedy
      // for adjectives
      if ("'".equals(token) && tokens[i].hasPosTag("POS")) {
        return false;
      }
      // puttin' on the Ritz
      if ("'".equals(token) && tokens[i - 1].hasPosTag("VBG")
          && tokens[i - 1].getToken().endsWith("in")) {
        return false;
      }
    }
    if (precSpace && !follSpace) {
      // hold 'em!
      if ("'".equals(token) && i + 1 < tokens.length
          && "em".equals(tokens[i + 1].getToken())) {
        return false;
      }
    }
    return true;
  }

  
}