summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl
diff options
context:
space:
mode:
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl')
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java55
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java200
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java82
5 files changed, 410 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java
new file mode 100644
index 0000000..6d2ff17
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Marcin Miłkowski, based on code by Daniel Naber
+ */
+
+public final class CompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/pl/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Brak łącznika lub zbędny łącznik");
+ super.setMsg("Ten wyraz pisze się z łącznikiem.",
+ "Ten wyraz pisze się razem (bez spacji ani łącznika).",
+ "Ten wyraz pisze się z łącznikiem lub bez niego.");
+ }
+
+ public final String getId() {
+ return "PL_COMPOUNDS";
+ }
+
+ public final String getDescription() {
+ return "Sprawdza wyrazy z łącznikiem, np. „łapu capu” zamiast „łapu-capu”";
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java
new file mode 100644
index 0000000..0a6f01b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for Polish rules.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public abstract class PolishRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java
new file mode 100644
index 0000000..3b83133
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.pl;
+
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;
+
+public class PolishUnpairedBracketsRule extends GenericUnpairedBracketsRule {
+
+ private static final String[] PL_START_SYMBOLS = { "[", "(", "{", "„", "»", "\"" };
+ private static final String[] PL_END_SYMBOLS = { "]", ")", "}", "”", "«", "\"" };
+
+ public PolishUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages, language);
+ startSymbols = PL_START_SYMBOLS;
+ endSymbols = PL_END_SYMBOLS;
+ }
+
+ public String getId() {
+ return "PL_UNPAIRED_BRACKETS";
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java
new file mode 100644
index 0000000..a7dbb5e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java
@@ -0,0 +1,200 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * @author Marcin Miłkowski
+ *
+ * Rule for detecting same words in the sentence but not just in a row
+ *
+ */
+public class PolishWordRepeatRule extends PolishRule {
+
+ /**
+ * Excluded dictionary words.
+ */
+ private static final Pattern EXC_WORDS = Pattern
+ .compile("nie|tuż|aż|to|siebie|być|ani|ni|albo|"
+ + "lub|czy|bądź|jako|zł|np|coraz"
+ + "|bardzo|bardziej|proc|ten|jak|mln|tys|swój|mój|"
+ + "twój|nasz|wasz|i|zbyt");
+
+ /**
+ * Excluded part of speech classes.
+ */
+ private static final Pattern EXC_POS = Pattern.compile("prep:.*|ppron.*");
+
+ /**
+ * Excluded non-words (special symbols, Roman numerals etc.
+ */
+ private static final Pattern EXC_NONWORDS = Pattern
+ .compile("&quot|&gt|&lt|&amp|[0-9].*|"
+ + "M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
+
+ public PolishWordRepeatRule(final ResourceBundle messages) {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ setDefaultOff();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#getId()
+ */
+ @Override
+ public final String getId() {
+ return "PL_WORD_REPEAT";
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#getDescription()
+ */
+ @Override
+ public final String getDescription() {
+ return "Powtórzenia wyrazów w zdaniu (monotonia stylistyczna)";
+ }
+
+ /*
+ * Tests if any word form is repeated in the sentence.
+ */
+ @Override
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ boolean repetition = false;
+ final TreeSet<String> inflectedWords = new TreeSet<String>();
+ String prevLemma, curLemma;
+ // start from real token, 0 = SENT_START
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ // avoid "..." etc. to be matched:
+ boolean isWord = true;
+ boolean hasLemma = true;
+
+ if (token.length() < 2) {
+ isWord = false;
+ }
+
+ final int readingsLen = tokens[i].getReadingsLength();
+ for (int k = 0; k < readingsLen; k++) {
+ final String posTag = tokens[i].getAnalyzedToken(k).getPOSTag();
+ if (posTag != null) {
+ if (StringTools.isEmpty(posTag)) {
+ isWord = false;
+ break;
+ }
+ // FIXME: too many false alarms here:
+ final String lemma = tokens[i].getAnalyzedToken(k).getLemma();
+ if (lemma == null) {
+ hasLemma = false;
+ break;
+ }
+ final Matcher m1 = EXC_WORDS.matcher(lemma);
+ if (m1.matches()) {
+ isWord = false;
+ break;
+ }
+
+ final Matcher m2 = EXC_POS.matcher(posTag);
+ if (m2.matches()) {
+ isWord = false;
+ break;
+ }
+ } else {
+ hasLemma = false;
+ }
+
+ }
+
+ final Matcher m1 = EXC_NONWORDS.matcher(tokens[i].getToken());
+ if (m1.matches()) {
+ isWord = false;
+ }
+
+ prevLemma = "";
+ if (isWord) {
+ boolean notSentEnd = false;
+ for (int j = 0; j < readingsLen; j++) {
+ final String pos = tokens[i].getAnalyzedToken(j).getPOSTag();
+ if (pos != null) {
+ notSentEnd |= "SENT_END".equals(pos);
+ }
+ if (hasLemma) {
+ curLemma = tokens[i].getAnalyzedToken(j).getLemma();
+ if (!prevLemma.equals(curLemma) && !notSentEnd) {
+ if (inflectedWords.contains(curLemma)) {
+ repetition = true;
+ } else {
+ inflectedWords.add(tokens[i].getAnalyzedToken(j).getLemma());
+ }
+ }
+ prevLemma = curLemma;
+ } else {
+ if (inflectedWords.contains(tokens[i].getToken()) && !notSentEnd) {
+ repetition = true;
+ } else {
+ inflectedWords.add(tokens[i].getToken());
+ }
+ }
+
+ }
+ }
+
+ if (repetition) {
+ final String msg = "Powtórzony wyraz w zdaniu";
+ final int pos = tokens[i].getStartPos();
+ final RuleMatch ruleMatch = new RuleMatch(this, pos, pos
+ + token.length(), msg, "Powtórzenie wyrazu");
+ ruleMatches.add(ruleMatch);
+ repetition = false;
+ }
+
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#reset()
+ */
+ @Override
+ public void reset() {
+ // nothing
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java
new file mode 100644
index 0000000..90708d9
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java
@@ -0,0 +1,82 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Polish implementations. Loads the list of words from
+ * <code>rules/pl/replace.txt</code>.
+ *
+ * @author Marcin Miłkowski
+ */
+public class SimpleReplaceRule extends AbstractSimpleReplaceRule {
+
+ public static final String POLISH_SIMPLE_REPLACE_RULE = "PL_SIMPLE_REPLACE";
+
+ private static final String FILE_NAME = "/pl/replace.txt";
+ // locale used on case-conversion
+ private static final Locale PL_LOCALE = new Locale("pl");
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+
+ public SimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return POLISH_SIMPLE_REPLACE_RULE;
+ }
+
+ public String getDescription() {
+ return "Typowe literówki";
+ }
+
+ public String getShort() {
+ return "Literówka";
+ }
+
+ public String getSuggestion() {
+ return " to typowa literówka, poprawnie: ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return PL_LOCALE;
+ }
+
+}