diff options
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java')
-rw-r--r-- | JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java new file mode 100644 index 0000000..35ecfa4 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java @@ -0,0 +1,136 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.util.ArrayList; +import java.util.List; +import java.util.ResourceBundle; + +import de.danielnaber.languagetool.AnalyzedSentence; +import de.danielnaber.languagetool.AnalyzedTokenReadings; +import de.danielnaber.languagetool.Language; + +/** + * Checks that a sentence starts with an uppercase letter. + * + * @author Daniel Naber + */ +public class UppercaseSentenceStartRule extends Rule { + + private final Language language; + + private String lastParagraphString = ""; + + public UppercaseSentenceStartRule(final ResourceBundle messages, + final Language language) { + super(messages); + super.setCategory(new Category(messages.getString("category_case"))); + this.language = language; + } + + public final String getId() { + return "UPPERCASE_SENTENCE_START"; + } + + public final String getDescription() { + return messages.getString("desc_uppercase_sentence"); + } + + public final RuleMatch[] match(final AnalyzedSentence text) { + final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); + final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace(); + if (tokens.length < 2) { + return toRuleMatchArray(ruleMatches); + } + int matchTokenPos = 1; // 0 = SENT_START + final String firstToken = tokens[matchTokenPos].getToken(); + String secondToken = null; + String thirdToken = null; + // ignore quote characters: + if (tokens.length >= 3 + && ("'".equals(firstToken) || "\"".equals(firstToken) || "„" + .equals(firstToken))) { + matchTokenPos = 2; + secondToken = tokens[matchTokenPos].getToken(); + } + final String firstDutchToken = dutchSpecialCase(firstToken, secondToken, + tokens); + if (firstDutchToken != null) { + thirdToken = firstDutchToken; + matchTokenPos = 3; + } + + String checkToken = firstToken; + if (thirdToken != null) { + checkToken = thirdToken; + } else if (secondToken != null) { + checkToken = secondToken; + } + + final String lastToken = tokens[tokens.length - 1].getToken(); + + boolean noException = false; + //fix for lists; note - this will not always work for the last point in OOo, + //as OOo might serve paragraphs in any order. + if ((language == Language.RUSSIAN || language == Language.POLISH) + && (";".equals(lastParagraphString) || ";".equals(lastToken) + || ",".equals(lastParagraphString) || ",".equals(lastToken))) { + noException = true; + } + //fix for comma in last paragraph; note - this will not always work for the last point in OOo, + //as OOo might serve paragraphs in any order. + if ((language == Language.RUSSIAN || language == Language.ITALIAN + || language == Language.POLISH || language == Language.GERMAN) + && (",".equals(lastParagraphString))) { + noException = true; + } + + lastParagraphString = lastToken; + + if (checkToken.length() > 0) { + final char firstChar = checkToken.charAt(0); + if (Character.isLowerCase(firstChar) && (!noException)) { + final RuleMatch ruleMatch = new RuleMatch(this, tokens[matchTokenPos] + .getStartPos(), tokens[matchTokenPos].getStartPos() + + tokens[matchTokenPos].getToken().length(), messages + .getString("incorrect_case")); + ruleMatch.setSuggestedReplacement(Character.toUpperCase(firstChar) + + checkToken.substring(1)); + ruleMatches.add(ruleMatch); + } + } + return toRuleMatchArray(ruleMatches); + } + + private String dutchSpecialCase(final String firstToken, + final String secondToken, final AnalyzedTokenReadings[] tokens) { + if (language != Language.DUTCH) { + return null; + } + if (tokens.length >= 3 && firstToken.equals("'") + && secondToken.matches("k|m|n|r|s|t")) { + return tokens[3].getToken(); + } + return null; + } + + public void reset() { + } + +} |