summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/test
diff options
context:
space:
mode:
Diffstat (limited to 'JLanguageTool/src/test')
-rw-r--r--JLanguageTool/src/test/.cvsignore1
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java81
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java58
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java36
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java238
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java386
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java233
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java117
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java72
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java57
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java158
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java59
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java56
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java37
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java102
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java78
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java55
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java159
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java98
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java75
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java81
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java288
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java56
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java91
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java57
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java80
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java80
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java190
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java116
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java88
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java53
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java49
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java87
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java167
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java46
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java151
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java74
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java75
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java167
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java46
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java151
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java43
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java87
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java502
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java283
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java46
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java56
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java51
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java80
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java53
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java55
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java153
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java55
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java62
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java56
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java72
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java49
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java117
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java51
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java46
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java46
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java49
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java83
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java43
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java48
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java60
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java59
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java117
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java62
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java236
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java70
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java81
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java89
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java90
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java45
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java59
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java62
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java60
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java60
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java58
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java60
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java97
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java105
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java147
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java59
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java58
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java60
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt1
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java120
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java108
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java107
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java38
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java118
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java82
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java108
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java100
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java83
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java38
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java152
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java157
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java122
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java143
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java263
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java105
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java39
-rw-r--r--JLanguageTool/src/test/testinput.txt3
106 files changed, 10216 insertions, 0 deletions
diff --git a/JLanguageTool/src/test/.cvsignore b/JLanguageTool/src/test/.cvsignore
new file mode 100644
index 0000000..fbb76d7
--- /dev/null
+++ b/JLanguageTool/src/test/.cvsignore
@@ -0,0 +1 @@
+*.probescript
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java
new file mode 100644
index 0000000..5d0661d
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java
@@ -0,0 +1,81 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+
+package de.danielnaber.languagetool;
+
+import junit.framework.TestCase;
+import java.security.Permission;
+
+/**
+ * @author Charlie Collins (Maven Test Example from
+ * http://www.screaming-penguin.com/node/7570)
+ */
+
+public class AbstractSecurityTestCase extends TestCase {
+
+ public AbstractSecurityTestCase(String name) {
+ super(name);
+ }
+
+ protected static class ExitException extends SecurityException {
+ private static final long serialVersionUID = 1L;
+ public final int status;
+ public ExitException(int status) {
+ super("There is no escape!");
+ this.status = status;
+ }
+ }
+
+ private static class NoExitSecurityManager extends SecurityManager {
+ @Override
+ public void checkPermission(@SuppressWarnings("unused") Permission perm) {
+ // allow anything.
+ }
+
+ @Override
+ @SuppressWarnings("unused")
+ public void checkPermission(Permission perm, Object context) {
+ // allow anything.
+ }
+
+ @Override
+ public void checkExit(int status) {
+ super.checkExit(status);
+ throw new ExitException(status);
+ }
+ }
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ System.setSecurityManager(new NoExitSecurityManager());
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ System.setSecurityManager(null);
+ super.tearDown();
+ }
+
+ //get rid of JUnit warning for this helper class
+ public void testSomething() {
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java
new file mode 100644
index 0000000..4bd4ce1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool;
+
+import junit.framework.TestCase;
+
+public class AnalyzedTokenReadingsTest extends TestCase {
+
+ public void testNewTags() {
+ AnalyzedTokenReadings testanaTokRead = new AnalyzedTokenReadings(new AnalyzedToken("word", "POS", "lemma"));
+ assertEquals(false, testanaTokRead.isLinebreak());
+ assertEquals(false, testanaTokRead.isSentEnd());
+ assertEquals(false, testanaTokRead.isParaEnd());
+ assertEquals(false, testanaTokRead.isSentStart());
+ testanaTokRead.setSentEnd();
+ assertEquals(false, testanaTokRead.isSentStart());
+ assertEquals(true, testanaTokRead.isSentEnd());
+ //test SEND_END or PARA_END added without directly via addReading
+ //which is possible e.g. in rule disambiguator
+ testanaTokRead = new AnalyzedTokenReadings(new AnalyzedToken("word", null, "lemma"));
+ testanaTokRead.addReading(new AnalyzedToken("word", "SENT_END", null));
+ assertEquals(true, testanaTokRead.isSentEnd());
+ assertEquals(false, testanaTokRead.isParaEnd());
+ testanaTokRead.addReading(new AnalyzedToken("word", "PARA_END", null));
+ assertEquals(true, testanaTokRead.isParaEnd());
+ assertEquals(false, testanaTokRead.isSentStart());
+ //but you can't add SENT_START to a non-empty token
+ //and get isSentStart == true
+ testanaTokRead.addReading(new AnalyzedToken("word", "SENT_START", null));
+ assertEquals(false, testanaTokRead.isSentStart());
+ AnalyzedToken aTok = new AnalyzedToken("word", "POS", "lemma");
+ aTok.setWhitespaceBefore(true);
+ testanaTokRead = new AnalyzedTokenReadings(aTok);
+ assertEquals(aTok, testanaTokRead.getAnalyzedToken(0));
+ AnalyzedToken aTok2 = new AnalyzedToken("word", "POS", "lemma");
+ assertTrue(!aTok2.equals(testanaTokRead.getAnalyzedToken(0)));
+ AnalyzedToken aTok3 = new AnalyzedToken("word", "POS", "lemma");
+ aTok3.setWhitespaceBefore(true);
+ assertEquals(aTok3, testanaTokRead.getAnalyzedToken(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java
new file mode 100644
index 0000000..66f86a5
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java
@@ -0,0 +1,36 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool;
+
+import junit.framework.TestCase;
+
+public class AnalyzedTokenTest extends TestCase {
+
+ public void testToString() {
+ AnalyzedToken testToken = new AnalyzedToken("word", "POS", "lemma");
+ assertEquals("lemma/POS", testToken.toString());
+ assertEquals("lemma", testToken.getLemma());
+ testToken = new AnalyzedToken("word", "POS", null);
+ assertEquals("word/POS", testToken.toString());
+ assertEquals(null, testToken.getLemma());
+ assertEquals("word", testToken.getToken());
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java
new file mode 100644
index 0000000..80afa8a
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java
@@ -0,0 +1,238 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool.paragraphHandling;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * @author Daniel Naber
+ */
+public class JLanguageToolTest extends TestCase {
+
+ // used on http://www.languagetool.org/usage/
+ /*
+ public void testDemo() throws IOException {
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ langTool.activateDefaultPatternRules();
+ List<RuleMatch> matches = langTool.check("A sentence " +
+ "with a error in the Hitchhiker's Guide tot he Galaxy");
+ for (RuleMatch match : matches) {
+ System.out.println("Potential error at line " +
+ match.getEndLine() + ", column " +
+ match.getColumn() + ": " + match.getMessage());
+ System.out.println("Suggested correction: " +
+ match.getSuggestedReplacements());
+ }
+ }
+ */
+
+
+ public void testEnglish() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+ List<RuleMatch> matches = tool.check("A test that should not give errors.");
+ assertEquals(0, matches.size());
+ matches = tool.check("A test test that should give errors.");
+ assertEquals(1, matches.size());
+ matches = tool.check("I can give you more a detailed description.");
+ assertEquals(0, matches.size());
+ assertEquals(8, tool.getAllRules().size());
+ final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir()
+ + "/en/grammar.xml");
+ for (PatternRule patternRule : rules) {
+ tool.addRule(patternRule);
+ }
+ assertTrue(tool.getAllRules().size() > 3);
+ matches = tool.check("I can give you more a detailed description.");
+ assertEquals(1, matches.size());
+ tool.disableRule("MORE_A_JJ");
+ matches = tool.check("I can give you more a detailed description.");
+ assertEquals(0, matches.size());
+ tool.disableCategory("Possible Typos");
+ matches = tool.check("I've go to go.");
+ assertEquals(0, matches.size());
+ }
+
+ public void testGerman() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.GERMAN);
+ List<RuleMatch> matches = tool.check("Ein Test, der keine Fehler geben sollte.");
+ assertEquals(0, matches.size());
+ matches = tool.check("Ein Test Test, der Fehler geben sollte.");
+ assertEquals(1, matches.size());
+ final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir()
+ + "/de/grammar.xml");
+ for (PatternRule patternRule : rules) {
+ tool.addRule(patternRule);
+ }
+ tool.setListUnknownWords(true);
+ // German rule has no effect with English error:
+ matches = tool.check("I can give you more a detailed description");
+ assertEquals(0, matches.size());
+ //test unknown words listing
+ assertEquals("[I, can, detailed, give, more, you]", tool.getUnknownWords().toString());
+ }
+
+ public void testDutch() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.DUTCH);
+ final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir()
+ + "/nl/grammar.xml");
+ for (PatternRule patternRule : rules) {
+ tool.addRule(patternRule);
+ }
+ List<RuleMatch> matches = tool.check("Een test, die geen fouten mag geven.");
+ assertEquals(0, matches.size());
+ matches = tool.check("Een test test, die een fout moet geven.");
+ assertEquals(1, matches.size());
+ //test uppercasing rule:
+ /*
+ matches = tool.check("De Afdeling Beheer kan het");
+ assertEquals(1, matches.size());
+ assertEquals("Als Afdeling geen deel uitmaakt van de naam, dan is juist:<suggestion>afdeling</suggestion>", matches.get(0).getMessage());
+ */
+ // Dutch rule has no effect with English error:
+ matches = tool.check("I can give you more a detailed description");
+ assertEquals(0, matches.size());
+ }
+
+ public void testPolish() throws IOException {
+ JLanguageTool tool = new JLanguageTool(Language.POLISH);
+ assertEquals("[PL]", Arrays.toString(Language.POLISH.getCountryVariants()));
+ List<RuleMatch> matches = tool.check("To jest całkowicie prawidłowe zdanie.");
+ assertEquals(0, matches.size());
+ matches = tool.check("To jest jest problem.");
+ assertEquals(1, matches.size());
+ //this rule is by default off
+ matches = tool.check("Był on bowiem pięknym strzelcem bowiem.");
+ assertEquals(0, matches.size());
+ tool.enableDefaultOffRule("PL_WORD_REPEAT");
+ matches = tool.check("Był on bowiem pięknym strzelcem bowiem.");
+ assertEquals(1, matches.size());
+ List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir()
+ + "/pl/grammar.xml");
+ for (final PatternRule rule : rules) {
+ tool.addRule(rule);
+ }
+ matches = tool.check("Premier drapie się w ucho co i rusz.");
+ assertEquals(1, matches.size());
+ // Polish rule has no effect with English error:
+ matches = tool.check("I can give you more a detailed description");
+ assertEquals(0, matches.size());
+ tool.setListUnknownWords(true);
+ matches = tool.check("This is not a Polish text.");
+ assertEquals("[Polish, This, is]", tool.getUnknownWords().toString());
+ //check positions relative to sentence ends
+ matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka.");
+ assertEquals(16, matches.get(0).getColumn());
+ //with a space...
+ matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka.");
+ assertEquals(16, matches.get(0).getColumn());
+ matches = tool.check("To jest tekst. Test 1. To jest linia w której nie ma przecinka.");
+ assertEquals(30, matches.get(0).getColumn());
+ //recheck with the -b mode...
+ final Language lang = Language.POLISH;
+ lang.getSentenceTokenizer().setSingleLineBreaksMarksParagraph(
+ true);
+ tool = new JLanguageTool(lang);
+ rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir()
+ + "/pl/grammar.xml");
+ for (final PatternRule rule : rules) {
+ tool.addRule(rule);
+ }
+ matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka.");
+ assertEquals(16, matches.get(0).getColumn());
+ //with a space...
+ matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka.");
+ assertEquals(16, matches.get(0).getColumn());
+ matches = tool.check("To jest tekst. To jest linia w której nie ma przecinka.");
+ assertEquals(23, matches.get(0).getColumn());
+
+ }
+
+ public void testSlovenian() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.SLOVENIAN);
+ List<RuleMatch> matches = tool.check("Kupil je npr. jajca, moko in mleko.");
+ assertEquals(0, matches.size());
+ }
+
+ public void testCountLines() {
+ assertEquals(0, JLanguageTool.countLineBreaks(""));
+ assertEquals(1, JLanguageTool.countLineBreaks("Hallo,\nnächste Zeile"));
+ assertEquals(2, JLanguageTool.countLineBreaks("\nZweite\nDritte"));
+ assertEquals(4, JLanguageTool.countLineBreaks("\nZweite\nDritte\n\n"));
+ }
+
+
+ public void testAnalyzedSentence() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+ //test soft-hyphen ignoring:
+ assertEquals("<S> This[this/DT] is[be/VBZ] a[a/DT] test­ed[tested/JJ,test/VBD,test/VBN,test­ed] sentence[sentence/NN,sentence/VB,sentence/VBP].[./.,</S>]", tool.getAnalyzedSentence("This is a test\u00aded sentence.").toString());
+ //test paragraph ends adding
+ assertEquals("<S> </S><P/> ", tool.getAnalyzedSentence("\n").toString());
+ }
+
+ public void testParaRules() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+
+ //run normally
+ List<RuleMatch> matches = tool.check("(This is an quote.\n It ends in the second sentence.");
+ assertEquals(2, matches.size());
+ assertEquals(2, tool.getSentenceCount());
+
+ //run in a sentence-only mode
+ matches = tool.check("(This is an quote.\n It ends in the second sentence.", false, paragraphHandling.ONLYNONPARA);
+ assertEquals(1, matches.size());
+ assertEquals("EN_A_VS_AN", matches.get(0).getRule().getId());
+ assertEquals(1, tool.getSentenceCount());
+
+ //run in a paragraph mode - single sentence
+ matches = tool.check("(This is an quote.\n It ends in the second sentence.", false, paragraphHandling.ONLYPARA);
+ assertEquals(1, matches.size());
+ assertEquals("EN_UNPAIRED_BRACKETS", matches.get(0).getRule().getId());
+ assertEquals(1, tool.getSentenceCount());
+
+ //run in a paragraph mode - many sentences
+ matches = tool.check("(This is an quote.\n It ends in the second sentence.", true, paragraphHandling.ONLYPARA);
+ assertEquals(1, matches.size());
+ assertEquals("EN_UNPAIRED_BRACKETS", matches.get(0).getRule().getId());
+ assertEquals(2, tool.getSentenceCount());
+ }
+
+ public void testWhitespace() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+ final AnalyzedSentence raw = tool.getRawAnalyzedSentence("Let's do a \"test\", do you understand?");
+ final AnalyzedSentence cooked = tool.getAnalyzedSentence("Let's do a \"test\", do you understand?");
+ //test if there was a change
+ assertFalse(raw.equals(cooked));
+ //see if nothing has been deleted
+ assertEquals(raw.getTokens().length, cooked.getTokens().length);
+ int i = 0;
+ for (final AnalyzedTokenReadings atr : raw.getTokens()) {
+ assertEquals(atr.isWhitespaceBefore(),
+ cooked.getTokens()[i].isWhitespaceBefore());
+ i++;
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java
new file mode 100644
index 0000000..b7c35ad
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java
@@ -0,0 +1,386 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool;
+
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+
+import java.io.PrintStream;
+import java.net.URISyntaxException;
+import java.net.URI;
+import java.net.URL;
+
+import javax.xml.parsers.ParserConfigurationException;
+import org.xml.sax.SAXException;
+
+/**
+ * Tests the basic features of the command-line interface.
+ *
+ * @author Marcin Miłkowski
+ */
+public class MainTest extends AbstractSecurityTestCase {
+
+ private static final String ENGLISH_TEST_FILE = "test-en.txt";
+
+ private ByteArrayOutputStream out;
+ private ByteArrayOutputStream err;
+ private PrintStream stdout;
+ private PrintStream stderr;
+
+ public MainTest(String testName) {
+ super(testName);
+ }
+
+ public void setUp() throws Exception {
+ super.setUp();
+ this.stdout = System.out;
+ this.stderr = System.err;
+ this.out = new ByteArrayOutputStream();
+ this.err = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(this.out));
+ System.setErr(new PrintStream(this.err));
+ }
+
+ public void tearDown() throws Exception {
+ super.tearDown();
+ System.setOut(this.stdout);
+ System.setErr(this.stderr);
+ }
+
+ public void testUsageMessage() throws IOException, ParserConfigurationException, SAXException {
+ try {
+ String[] args = new String[] {"-h"};
+ Main.main(args);
+ fail("LT should have exited with status 0!");
+ }
+ catch (ExitException e) {
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Usage: java de.danielnaber.languagetool.Main [-r|--recursive] [-v|--verbose") != -1);
+ assertEquals("Exit status", 1, e.status);
+ }
+ }
+
+ public void testEnglishFile() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ //System.err.println("###"+url);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ //System.out.println("#>"+output);
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1);
+ }
+
+ public void testEnglishFileVerbose() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "-v", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1);
+ String tagText = new String(this.err.toByteArray());
+ assertTrue(tagText.indexOf("<S> This[this/DT] is[be/VBZ] an[a/DT] test[test/NN].[./.,</S>]") != -1);
+ }
+
+ public void testEnglishFileApplySuggestions() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "--apply", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertEquals("This is a test.\n", output);
+ }
+
+
+ public void testEnglishStdIn1() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "This is an test.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "en"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1);
+ }
+
+ public void testEnglishStdIn2() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "This is an test.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "en", "-"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1);
+ }
+
+ public void testEnglishStdIn3() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "This is an test.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "en", "-a", "-"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertEquals("This is a test.\n", output);
+ }
+
+ //test line mode vs. para mode
+ //first line mode
+ public void testEnglishLineMode() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "This is what I mean\nand you know it.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "en", "-a", "-b", "-"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertEquals("This is what I mean\nAnd you know it.\n", output);
+ }
+
+ //first line mode
+ public void testEnglishParaMode() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "This is what I mean\nand you know it.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "en", "-a", "-"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertEquals("This is what I mean\nand you know it.\n", output);
+ }
+
+ public void testPolishStdInDefaultOff() throws IOException, ParserConfigurationException, SAXException {
+ final String test = "To jest test, który zrobiłem, który mi się podoba.";
+ final byte[] b = test.getBytes();
+ System.setIn(new ByteArrayInputStream(b));
+ String[] args = new String[] {"-l", "pl", "-e", "PL_WORD_REPEAT", "-"};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf("Working on STDIN...") != -1);
+ assertTrue(output.indexOf("1.) Line 1, column 31, Rule ID: PL_WORD_REPEAT") != -1);
+ }
+
+ public void testEnglishFileRuleDisabled() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "-d", "EN_A_VS_AN", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("Rule ID: EN_A_VS_AN") == -1);
+ }
+
+ public void testEnglishFileRuleEnabled() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "-e", "EN_A_VS_AN", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("Rule ID: EN_A_VS_AN") != -1);
+ }
+
+ public void testEnglishFileAPI() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "--api", uri.getPath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") == 0);
+ assertTrue(output.indexOf("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"11\" ruleId=\"EN_A_VS_AN\" msg=\"Use 'a' instead of 'an' if the following word doesn't start with a vowel sound, e.g. 'a sentence', 'a university'\" replacements=\"a\" context=\"This is an test. \" contextoffset=\"8\" errorlength=\"2\"/>") != -1);
+ }
+
+ public void testPolishFileAPI() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("To jest świnia która się ślini.");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", "--api", input.getAbsolutePath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") == 0);
+ assertTrue(output.indexOf("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"21\" ruleId=\"BRAK_PRZECINKA_KTORY\" subId=\"5\"") != -1);
+ //This tests whether XML encoding is actually UTF-8:
+ assertTrue(output.indexOf("msg=\"Brak przecinka w tym fragmencie zdania. Przecinek prawdopodobnie należy postawić tak: 'świnia, która'.\" replacements=\"świnia, która\" ") != -1);
+ assertTrue(output.indexOf("context=\"To jest świnia która się ślini. \" contextoffset=\"8\" errorlength=\"12\"/>") != -1);
+ }
+
+ public void testPolishLineNumbers() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("Test.");
+ w.println("Test.");
+ w.println("Test.");
+ w.println("Test.");
+ w.println("Test.");
+ w.println("Test.");
+ w.println("");
+ w.println("Test który wykaże błąd.");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", input.getAbsolutePath()};
+
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf("Line 8, column 1, Rule ID: BRAK_PRZECINKA_KTORY") != -1);
+ }
+
+ public void testEnglishTagger() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "en", "--taggeronly", uri.getPath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: English") == 0);
+ assertTrue(output.indexOf("<S> This[this/DT] is[be/VBZ] an[a/DT] test[test/NN].[./.,</S>]") != -1);
+ }
+
+ public void testBitextMode() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("This is not actual.\tTo nie jest aktualne.");
+ w.println("Test\tTest");
+ w.println("ab\tVery strange data indeed, much longer than input");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", input.getAbsolutePath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf(
+ "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") != -1);
+ assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") != -1);
+ assertTrue(output.indexOf("Line 3, column 4, Rule ID: TRANSLATION_LENGTH") != -1);
+ }
+
+ public void testBitextModeWithDisabledRule() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("this is not actual.\tTo nie jest aktualne.");
+ w.println("test\tTest");
+ w.println("ab\tVery strange data indeed, much longer than input");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "-d", "UPPERCASE_SENTENCE_START,TRANSLATION_LENGTH", input.getAbsolutePath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf(
+ "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") != -1);
+ assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") != -1);
+ assertTrue(output.indexOf("Rule ID: TRANSLATION_LENGTH") == -1);
+ }
+
+ public void testBitextModeWithEnabledRule() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("this is not actual.\tTo nie jest aktualne.");
+ w.println("test\tTest");
+ w.println("ab\tVery strange data indeed, much longer than input");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "-e", "TRANSLATION_LENGTH", input.getAbsolutePath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf(
+ "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") == -1);
+ assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") == -1);
+ assertTrue(output.indexOf("Rule ID: TRANSLATION_LENGTH") != -1);
+ }
+
+ public void testBitextModeApply() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("There is a dog.\tNie ma psa.");
+ w.close();
+
+ String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "--apply", input.getAbsolutePath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.startsWith("Istnieje psa."));
+ }
+
+ public void testListUnknown() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "pl", "-u", uri.getPath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf("Unknown words: [This, is]") != -1);
+ }
+
+ public void testNoListUnknown() throws URISyntaxException, IOException, ParserConfigurationException, SAXException {
+ final URL url = this.getClass().getResource(ENGLISH_TEST_FILE);
+ final URI uri = new URI (url.toString());
+ String[] args = new String[] {"-l", "pl", uri.getPath()};
+ Main.main(args);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Expected text language: Polish") == 0);
+ assertTrue(output.indexOf("Unknown words: [This, is]") == -1);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java b/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java
new file mode 100644
index 0000000..a9a669c
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java
@@ -0,0 +1,233 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import junit.framework.Assert;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * @author Daniel Naber
+ */
+public final class TestTools {
+
+ private TestTools() {
+ }
+
+ public static ResourceBundle getEnglishMessages() {
+ return getMessages("en");
+ }
+
+ /**
+ * Gets the resource bundle for the specified language.
+ * @param language lowercase two-letter ISO-639 code.
+ * @return the resource bundle for the specified language.
+ */
+ public static ResourceBundle getMessages(String language) {
+ final ResourceBundle messages = ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", new Locale(language));
+ return messages;
+ }
+
+ public static void testSplit(final String[] sentences,
+ final SentenceTokenizer stokenizer) {
+ final StringBuilder inputString = new StringBuilder();
+ final List<String> input = new ArrayList<String>();
+ for (final String sentence : sentences) {
+ input.add(sentence);
+ }
+ for (final String string : input) {
+ inputString.append(string);
+ }
+ Assert.assertEquals(input, stokenizer.tokenize(inputString.toString()));
+ }
+
+ public static void myAssert(final String input, final String expected,
+ final Tokenizer tokenizer, final Tagger tagger) throws IOException {
+ final List<String> tokens = tokenizer.tokenize(input);
+ final List<String> noWhitespaceTokens = new ArrayList<String>();
+ // whitespace confuses tagger, so give it the tokens but no whitespace
+ // tokens:
+ for (final String token : tokens) {
+ if (isWord(token)) {
+ noWhitespaceTokens.add(token);
+ }
+ }
+ final List<AnalyzedTokenReadings> output = tagger.tag(noWhitespaceTokens);
+ final StringBuffer outputStr = new StringBuffer();
+ for (final Iterator<AnalyzedTokenReadings> iter = output.iterator(); iter
+ .hasNext();) {
+ final AnalyzedTokenReadings token = iter.next();
+ final int readingsNumber = token.getReadingsLength();
+ final List<String> readings = new ArrayList<String>();
+ for (int j = 0; j < readingsNumber; j++) {
+ final StringBuffer readingStr = new StringBuffer();
+ readingStr.append(token.getAnalyzedToken(j).getToken());
+ readingStr.append("/[");
+ readingStr.append(token.getAnalyzedToken(j).getLemma());
+ readingStr.append(']');
+ readingStr.append(token.getAnalyzedToken(j).getPOSTag());
+ readings.add(readingStr.toString());
+ }
+ // force some order on the result just for the test case - order may vary
+ // from one version of the lexicon to the next:
+ Collections.sort(readings);
+ outputStr.append(StringTools.listToString(readings, "|"));
+ if (iter.hasNext()) {
+ outputStr.append(" -- ");
+ }
+ }
+ Assert.assertEquals(expected, outputStr.toString());
+ }
+
+ public static void myAssert(final String input, final String expected,
+ final Tokenizer tokenizer, final SentenceTokenizer sentenceTokenizer,
+ final Tagger tagger, final Disambiguator disambiguator)
+ throws IOException {
+ final StringBuffer outputStr = new StringBuffer();
+ final List<String> sentences = sentenceTokenizer.tokenize(input);
+ for (final String sentence : sentences) {
+ final List<String> tokens = tokenizer.tokenize(sentence);
+ final List<String> noWhitespaceTokens = new ArrayList<String>();
+ // whitespace confuses tagger, so give it the tokens but no whitespace
+ // tokens:
+ for (final String token : tokens) {
+ if (isWord(token)) {
+ noWhitespaceTokens.add(token);
+ }
+ }
+ final List<AnalyzedTokenReadings> aTokens = tagger
+ .tag(noWhitespaceTokens);
+ final AnalyzedTokenReadings[] tokenArray = new AnalyzedTokenReadings[tokens
+ .size() + 1];
+ final AnalyzedToken[] startTokenArray = new AnalyzedToken[1];
+ int toArrayCount = 0;
+ final AnalyzedToken sentenceStartToken = new AnalyzedToken("",
+ "SENT_START", null);
+ startTokenArray[0] = sentenceStartToken;
+ tokenArray[toArrayCount++] = new AnalyzedTokenReadings(startTokenArray, 0);
+ int startPos = 0;
+ int noWhitespaceCount = 0;
+ for (final String tokenStr : tokens) {
+ AnalyzedTokenReadings posTag = null;
+ if (isWord(tokenStr)) {
+ posTag = aTokens.get(noWhitespaceCount);
+ posTag.setStartPos(startPos);
+ noWhitespaceCount++;
+ } else {
+ posTag = tagger.createNullToken(tokenStr, startPos);
+ }
+ tokenArray[toArrayCount++] = posTag;
+ startPos += tokenStr.length();
+ }
+
+ AnalyzedSentence finalSentence = new AnalyzedSentence(tokenArray);
+ // disambiguate assigned tags
+ finalSentence = disambiguator.disambiguate(finalSentence);
+
+ final AnalyzedTokenReadings[] output = finalSentence.getTokens();
+
+ for (int i = 0; i < output.length; i++) {
+ final AnalyzedTokenReadings token = output[i];
+ final int readingsNumber = token.getReadingsLength();
+ final List<String> readings = new ArrayList<String>();
+ for (int j = 0; j < readingsNumber; j++) {
+ final StringBuffer readingStr = new StringBuffer();
+ readingStr.append(token.getAnalyzedToken(j).getToken());
+ readingStr.append("/[");
+ readingStr.append(token.getAnalyzedToken(j).getLemma());
+ readingStr.append(']');
+ readingStr.append(token.getAnalyzedToken(j).getPOSTag());
+ readings.add(readingStr.toString());
+ }
+ // force some order on the result just for the test case - order may vary
+ // from one version of the lexicon to the next:
+ Collections.sort(readings);
+ outputStr.append(StringTools.listToString(readings, "|"));
+ if (i < output.length - 1) {
+ outputStr.append(' ');
+ }
+ }
+ }
+ Assert.assertEquals(expected, outputStr.toString());
+ }
+
+ public static boolean isWord(final String token) {
+ for (int i = 0; i < token.length(); i++) {
+ final char c = token.charAt(i);
+ if (Character.isLetter(c) || Character.isDigit(c)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Used to call private methods for testing
+ *
+ * @param targetClass
+ * Class under test
+ * @param methodName
+ * Name of the method under test
+ * @param argClasses
+ * Types of arguments
+ * @param argObjects
+ * Values of arguments
+ * @throws InvocationTargetException
+ * @throws IllegalAccessException
+ * @throws IllegalArgumentException
+ * @throws NoSuchMethodException
+ * @throws SecurityException
+ */
+ public static void callStaticMethod(final Class targetClass,
+ final String methodName, final Class[] argClasses,
+ final Object[] argObjects) throws InvocationTargetException,
+ IllegalArgumentException, IllegalAccessException, SecurityException,
+ NoSuchMethodException {
+
+ final Method method = targetClass.getDeclaredMethod(methodName, argClasses);
+ method.setAccessible(true);
+ method.invoke(null, argObjects);
+ }
+
+ public static String callStringStaticMethod(final Class targetClass,
+ final String methodName, final Class[] argClasses,
+ final Object[] argObjects) throws InvocationTargetException,
+ IllegalArgumentException, IllegalAccessException, SecurityException,
+ NoSuchMethodException {
+
+ final Method method = targetClass.getDeclaredMethod(methodName, argClasses);
+ method.setAccessible(true);
+ return (String) method.invoke(null, argObjects);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java
new file mode 100644
index 0000000..fe4b5e2
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java
@@ -0,0 +1,117 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+import junit.framework.TestCase;
+
+/**
+ * Check if the translations seem to be complete.
+ *
+ * @author Daniel Naber
+ */
+public class TranslationTest extends TestCase {
+
+ public void testTranslationKeyExistence() throws IOException {
+ // use English version as the reference:
+ File englishFile = getTranslationFile(Language.ENGLISH);
+ Properties enProps = new Properties();
+ enProps.load(new FileInputStream(englishFile));
+ Set<Object> englishKeys = enProps.keySet();
+ for (int i = 0; i < Language.LANGUAGES.length; i++) {
+ Language lang = Language.LANGUAGES[i];
+ if (lang == Language.ENGLISH || lang == Language.DEMO)
+ continue;
+ Properties langProps = new Properties();
+ File langFile = getTranslationFile(lang);
+ if (!langFile.exists())
+ continue;
+ langProps.load(new FileInputStream(langFile));
+ Set<Object> langKeys = langProps.keySet();
+ for (Object englishKey : englishKeys) {
+ if (!langKeys.contains(englishKey)) {
+ System.err.println("***** No key '" + englishKey + "' in file " + langFile);
+ }
+ }
+ }
+ }
+
+ /**
+ * Make sure values are not empty.
+ */
+ public void testTranslationsAreNotEmpty() throws IOException {
+ for (int i = 0; i < Language.LANGUAGES.length; i++) {
+ Language lang = Language.LANGUAGES[i];
+ if (lang == Language.DEMO)
+ continue;
+ File file = getTranslationFile(lang);
+ if (!file.exists()) {
+ System.err.println("Note: no translation available for " + lang);
+ continue;
+ }
+ List<String> lines = loadFile(file);
+ for (String line : lines) {
+ line = line.trim();
+ if (StringTools.isEmpty(line) || line.charAt(0)=='#')
+ continue;
+ String[] parts = line.split("=");
+ if (parts.length < 2) {
+ System.err.println("***** Empty translation: '" + line + "' in file " + file);
+ //fail("Empty translation: '" + line + "' in file " + file);
+ }
+ }
+ }
+ }
+
+ private List<String> loadFile(File file) throws IOException {
+ List<String> l = new ArrayList<String>();
+ FileReader fr = null;
+ BufferedReader br = null;
+ try {
+ fr = new FileReader(file);
+ br = new BufferedReader(fr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ l.add(line);
+ }
+ } finally {
+ if (br != null) br.close();
+ if (fr != null) fr.close();
+ }
+ return l;
+ }
+
+ private File getTranslationFile(Language lang) {
+ return new File("src" + File.separator + "java" + File.separator
+ + "de" + File.separator + "danielnaber" + File.separator + "languagetool"
+ + File.separator + "MessagesBundle_" + lang.getShortName() + ".properties");
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java
new file mode 100644
index 0000000..cf74a6b
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+public class ValidateXMLTest extends TestCase {
+
+ public void testPatternFile() throws IOException {
+ XMLValidator validator = new XMLValidator();
+ for (int i = 0; i < Language.LANGUAGES.length; i++) {
+ Language lang = Language.LANGUAGES[i];
+ String grammarFile = JLanguageTool.getDataBroker().getRulesDir() + "/" + lang.getShortName() + "/grammar.xml";
+ validator.validate(grammarFile, JLanguageTool.getDataBroker().getRulesDir() + "/rules.xsd");
+ }
+ }
+
+ public void testFalseFriendsXML() throws IOException {
+ XMLValidator validator = new XMLValidator();
+ validator.validate(JLanguageTool.getDataBroker().getRulesDir() + "/false-friends.xml",
+ JLanguageTool.getDataBroker().getRulesDir() + "/false-friends.dtd", "rules");
+ }
+
+ public void testDisambiguationRuleFile() throws IOException {
+ XMLValidator validator = new XMLValidator();
+ //for (int i = 0; i < Language.LANGUAGES.length; i++) {
+ // Language lang = Language.LANGUAGES[i];
+ Language lang = Language.FRENCH;
+ String grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml";
+ validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd");
+ lang = Language.ENGLISH;
+ grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml";
+ validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd");
+ lang = Language.DUTCH;
+ grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml";
+ validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd");
+ lang = Language.POLISH;
+ grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml";
+ validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd");
+ // }
+ }
+
+ /**
+ * Validate XML files, as a help for people developing rules that are not programmers.
+ */
+ public static void main(final String[] args) throws IOException {
+ final ValidateXMLTest prt = new ValidateXMLTest();
+ System.out.println("Validating XML grammar files ...");
+ prt.testPatternFile();
+ prt.testFalseFriendsXML();
+ prt.testDisambiguationRuleFile();
+ System.out.println("Validation tests successful.");
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java
new file mode 100644
index 0000000..d60d387
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java
@@ -0,0 +1,57 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+public class VersionNumberTest extends TestCase {
+
+ public void testVersionNumber() throws IOException {
+ String buildFile = load("build.properties");
+ Pattern p1 = Pattern.compile("version = ([0-9\\.]+(-dev)?)");
+ Matcher m1 = p1.matcher(buildFile);
+ m1.find();
+ String javaFile = load("src/java/de/danielnaber/languagetool/JLanguageTool.java");
+ Pattern p2 = Pattern.compile("VERSION = \"(.*?)\"");
+ Matcher m2 = p2.matcher(javaFile);
+ m2.find();
+ assertEquals(m1.group(1), m2.group(1));
+ //System.out.println(m1.group(1));
+ }
+
+ private String load(String filename) throws IOException {
+ FileReader fr = new FileReader(filename);
+ BufferedReader br = new BufferedReader(fr);
+ StringBuffer sb = new StringBuffer();
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ }
+ br.close();
+ fr.close();
+ return sb.toString();
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java b/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java
new file mode 100644
index 0000000..eaf606e
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java
@@ -0,0 +1,158 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.net.URL;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.transform.stream.StreamSource;
+import javax.xml.validation.Schema;
+import javax.xml.validation.SchemaFactory;
+import javax.xml.validation.Validator;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Validate XML files with a given DTD.
+ *
+ * @author Daniel Naber
+ */
+public final class XMLValidator {
+
+ public XMLValidator() {
+ }
+
+ /**
+ * Check some limits of our simplified XML output.
+ */
+ public void checkSimpleXMLString(String xml) throws IOException {
+ Pattern p = Pattern.compile("(<error.*?/>)", Pattern.DOTALL|Pattern.MULTILINE);
+ Matcher matcher = p.matcher(xml);
+ int pos = 0;
+ while (matcher.find(pos)) {
+ String errorElement = matcher.group();
+ pos = matcher.end();
+ if (errorElement.contains("\n") || errorElement.contains("\r"))
+ throw new IOException("<error ...> may not contain line breaks");
+ char beforeError = xml.charAt(matcher.start()-1);
+ if (beforeError != '\n' && beforeError != '\r')
+ throw new IOException("Each <error ...> must start on a new line");
+ }
+ }
+
+ /**
+ * Validate XML with the given DTD. Throws exception on error.
+ */
+ public void validateXMLString(String xml, String dtdFile, String docType) throws SAXException, IOException, ParserConfigurationException {
+ validateInternal(xml, dtdFile, docType);
+ }
+
+ /**
+ * Validate XML file with the given DTD. Throws exception on error.
+ */
+ public final void validate(String filename, String dtdFile, String docType) throws IOException {
+ try {
+ String xml = StringTools.readFile(this.getClass().getResourceAsStream(filename), "utf-8");
+ validateInternal(xml, dtdFile, docType);
+ } catch (Exception e) {
+ IOException ioe = new IOException("Cannot load or parse '"+filename+"'");
+ ioe.initCause(e);
+ throw ioe;
+ }
+ }
+
+ /**
+ * Validate XML file using the given XSD. Throws an exception on error
+ * @param filename File to validate.
+ * @param xmlSchema Schema to use.
+ * @throws IOException Thrown on error.
+ */
+ public final void validate(String filename, String xmlSchema) throws IOException {
+ try {
+ validateInternal(this.getClass().getResourceAsStream(filename),
+ this.getClass().getResource(xmlSchema));
+ } catch (Exception e) {
+ IOException ioe = new IOException("Cannot load or parse '"+filename+"'");
+ ioe.initCause(e);
+ throw ioe;
+ }
+ }
+
+ private void validateInternal(String xml, String dtdFile, String doctype) throws SAXException, IOException, ParserConfigurationException {
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setValidating(true);
+ SAXParser saxParser = factory.newSAXParser();
+ //used for removing existing DOCTYPE from grammar.xml files
+ xml = xml.replaceAll("<!DOCTYPE.+>", "");
+ final String decl = "<?xml version=\"1.0\"";
+ final String endDecl = "?>";
+ final String dtd = "<!DOCTYPE "+doctype+" PUBLIC \"-//W3C//DTD Rules 0.1//EN\" \"" +this.getClass().getResource(dtdFile)+ "\">";
+ int pos = xml.indexOf(decl);
+ int endPos = xml.indexOf(endDecl);
+ if (pos == -1)
+ throw new IOException("No XML declaration found in '" + xml.substring(0, Math.min(100, xml.length())) + "...'");
+ String newXML = xml.substring(0, endPos+endDecl.length()) + "\r\n" + dtd + xml.substring(endPos+endDecl.length());
+ //System.err.println(newXML);
+ InputSource is = new InputSource(new StringReader(newXML));
+ saxParser.parse(is, new ErrorHandler());
+ }
+
+ private void validateInternal(InputStream xml, URL xmlSchema) throws SAXException, IOException, ParserConfigurationException {
+ SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
+ Schema schema = sf.newSchema(xmlSchema);
+ Validator validator = schema.newValidator();
+ validator.setErrorHandler(new ErrorHandler());
+ validator.validate(new StreamSource(xml));
+ }
+
+}
+
+/**
+ * XML handler that throws exception on error and warning, does nothing otherwise.
+ */
+class ErrorHandler extends DefaultHandler {
+
+ public void warning (SAXParseException e) throws SAXException {
+ System.err.println(e.getMessage()
+ + " Problem found at line " + e.getLineNumber()
+ + ", column " + e.getColumnNumber() + ".");
+ throw e;
+ }
+
+ public void error (SAXParseException e) throws SAXException {
+ System.err.println(e.getMessage()
+ + " Problem found at line " + e.getLineNumber()
+ + ", column " + e.getColumnNumber() + ".");
+ throw e;
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java
new file mode 100644
index 0000000..0c66989
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java
@@ -0,0 +1,59 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+
+import junit.framework.TestCase;
+
+public class TabBitextReaderTest extends TestCase {
+
+ public void testReader() throws Exception {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("This is not actual.\tTo nie jest aktualne.");
+ w.println("Test\tTest");
+ w.println("ab\tVery strange data indeed, much longer than input");
+ w.close();
+
+ TabBitextReader reader = new TabBitextReader(input.getAbsolutePath(), "UTF-8");
+ int i = 1;
+ for (StringPair srcAndTrg : reader) {
+ assertTrue(srcAndTrg.getSource() != null);
+ assertTrue(srcAndTrg.getTarget() != null);
+ if (i == 1) {
+ assertEquals("This is not actual.", srcAndTrg.getSource());
+ } else if (i == 2) {
+ assertEquals("Test", srcAndTrg.getSource());
+ } else if (i == 3) {
+ assertEquals("Very strange data indeed, much longer than input",
+ srcAndTrg.getTarget());
+ }
+ i++;
+ }
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java
new file mode 100644
index 0000000..b044fc0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+
+import junit.framework.TestCase;
+
+public class WordFastTMReaderTest extends TestCase {
+
+ public void testReader() throws Exception {
+ // Create a simple WordFast text memory.
+ File input = File.createTempFile("input", ".txt");
+ input.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("%20100801~111517\t%UserID,AHLJat,AHLJat\t%TU=00008580\t%EN-US\t%Wordfast TM v.546/00\t%PL-PL\t%\t.");
+ w.println("20100727~145333\tAHLJat\t2\tEN-US\tObjection:\tPL-PL\tZarzut: ");
+ w.println("20100727~051350\tAHLJat\t2\tEN-US\tWhy not?&tA;\tPL-PL\tDlaczego nie?&tA; ");
+ w.close();
+
+ WordFastTMReader reader = new WordFastTMReader(input.getAbsolutePath(), "UTF-8");
+ int i = 1;
+ for (StringPair srcAndTrg : reader) {
+ assertTrue(srcAndTrg.getSource() != null);
+ assertTrue(srcAndTrg.getTarget() != null);
+ if (i == 1) {
+ assertEquals("Objection:", srcAndTrg.getSource());
+ } else if (i == 2) {
+ assertEquals("Why not?&tA;", srcAndTrg.getSource());
+ }
+ i++;
+ }
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java
new file mode 100644
index 0000000..863e8ac
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java
@@ -0,0 +1,37 @@
+package de.danielnaber.languagetool.openoffice;
+
+import junit.framework.TestCase;
+
+import com.sun.star.lang.Locale;
+import com.sun.star.linguistic2.ProofreadingResult;
+import com.sun.star.beans.PropertyValue;
+
+public class MainTest extends TestCase {
+
+ public void testDoProofreading() {
+ Main prog = new Main(null);
+ final String testString = "To jest trudne zdanie. A to następne. A to przedostatnie jest.\u0002 Test ostatniego.";
+ final Locale plLoc = new Locale("pl", "PL", "");
+ final PropertyValue[] prop = new PropertyValue[0];
+ for (int i = 0; i<=testString.length(); i++) {
+ ProofreadingResult paRes = prog.doProofreading("1", testString, plLoc, i, testString.length(), prop);
+ assertEquals("1", paRes.aDocumentIdentifier);
+ assertTrue(paRes.nStartOfNextSentencePosition >= i);
+ if (i < "To jest trudne zdanie. ".length()) {
+ assertEquals("To jest trudne zdanie. ".length(), paRes.nStartOfNextSentencePosition);
+ assertEquals(0, paRes.nStartOfSentencePosition);
+ }
+ }
+ ProofreadingResult paRes = prog.doProofreading("1", testString, plLoc, 0, testString.length(), prop);
+ assertEquals("1", paRes.aDocumentIdentifier);
+ assertEquals(23, paRes.nStartOfNextSentencePosition);
+ assertEquals(0, paRes.nStartOfSentencePosition);
+ //that was causing NPE but not anymore:
+ String testString2 = "To jest „nowy problem”. A to inny jeszcze( „problem. Co jest „?";
+ paRes = prog.doProofreading("1", testString2, plLoc, 0, testString2.length(), prop);
+ assertEquals("1", paRes.aDocumentIdentifier);
+ assertEquals(24, paRes.nStartOfNextSentencePosition);
+ assertEquals(0, paRes.nStartOfSentencePosition);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java
new file mode 100644
index 0000000..6a069ff
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java
@@ -0,0 +1,102 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ * @author Daniel Naber
+ */
+public class CommaWhitespaceRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ CommaWhitespaceRule rule = new CommaWhitespaceRule(TestTools.getEnglishMessages());
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is a test sentence.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This, is, a test sentence.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This (foo bar) is a test(!).")).length);
+ //we get only entities into the comma rule, so let's test for entities:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("&quot;This is it,&quot; he said.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das kostet €2,45.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das kostet 50,- Euro")).length);
+ //test OpenOffice field codes:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("In his book,\u0002 Einstein proved this to be true.")).length);
+
+ //test thousand separators:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is $1,000,000.")).length);
+ //test numbers:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is 1,5.")).length);
+
+ //test two consecutive commas:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is a ,,test''.")).length);
+
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("This,is a test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This , is a test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This ,is a test sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence(",is a test sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This ( foo bar) is a test(!)."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This (foo bar ) is a test(!)."));
+ assertEquals(1, matches.length);
+
+ //other brackets, first [
+ matches = rule.match(langTool.getAnalyzedSentence("This [ foo bar) is a test(!)."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This (foo bar ] is a test(!)."));
+ assertEquals(1, matches.length);
+ //now {
+ matches = rule.match(langTool.getAnalyzedSentence("This { foo bar) is a test(!)."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This (foo bar } is a test(!)."));
+ assertEquals(1, matches.length);
+
+ //full stop error:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a sentence with an orphaned full stop ."));
+ assertEquals(1, matches.length);
+ //full stop exception cases:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a sentence with ellipsis ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a figure: .5 and it's correct."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool.getAnalyzedSentence("ABB ( z.B. )"));
+ // check match positions:
+ assertEquals(2, matches.length);
+ assertEquals(4, matches[0].getFromPos());
+ assertEquals(6, matches[0].getToPos());
+ assertEquals(11, matches[1].getFromPos());
+ assertEquals(13, matches[1].getToPos());
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test with a OOo footnote\u0002, which is denoted by 0x2 in the text."));
+ assertEquals(0, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java
new file mode 100644
index 0000000..92b50db
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java
@@ -0,0 +1,78 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * Abstract test case for CompoundRule. <br/>
+ * Based on an original version for [en] and [pl].
+ *
+ * @author Daniel Naber
+ *
+ */
+public abstract class CompoundRuleTestAbs extends TestCase {
+
+ // the object used for checking text against different rules
+ protected JLanguageTool langTool;
+ // the rule that checks that compounds (if in the list) are not written as separate words. Language specific.
+ protected AbstractCompoundRule rule;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ // concrete classes will initialize langTool and rule variables here.
+ }
+
+ public void check(int expectedErrors, String text) throws IOException {
+ check(expectedErrors, text, null);
+ }
+
+ /**
+ * Check the text against the compound rule.
+ * @param expectedErrors teh number of extected errors.
+ * @param text the text to check.
+ * @param expSuggestions the expected suggestions.
+ * @throws IOException thrown by JLanguageTool.
+ */
+ public void check(int expectedErrors, String text, String[] expSuggestions) throws IOException {
+ assertNotNull("Please initialize langTool!", langTool);
+ assertNotNull("Please initialize 'rule'!", rule);
+ final RuleMatch[] ruleMatches = rule.match(langTool.getAnalyzedSentence(text));
+ assertEquals(expectedErrors, ruleMatches.length);
+ if (expSuggestions != null && expectedErrors != 1) {
+ throw new RuntimeException("Sorry, test case can only check suggestion if there's one rule match");
+ }
+ if (expSuggestions != null) {
+ final RuleMatch ruleMatch = ruleMatches[0];
+ assertEquals(String.format("Got these suggestions: %s, expected %d ", ruleMatch.getSuggestedReplacements(), expSuggestions.length),
+ expSuggestions.length, ruleMatch.getSuggestedReplacements().size());
+ int i = 0;
+ for (final Object element : ruleMatch.getSuggestedReplacements()) {
+ final String suggestion = (String) element;
+ //System.err.println(">>"+suggestion);
+ assertEquals(expSuggestions[i], suggestion);
+ i++;
+ }
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java
new file mode 100644
index 0000000..fc08de0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ * @author Daniel Naber
+ */
+public class DoublePunctuationRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ DoublePunctuationRule rule = new DoublePunctuationRule(TestTools.getEnglishMessages());
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence... More stuff...."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence..... More stuff...."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This, is, a test sentence."));
+ assertEquals(0, matches.length);
+
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("This,, is a test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.. Another sentence"));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java
new file mode 100644
index 0000000..46da891
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java
@@ -0,0 +1,159 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+
+public class GenericUnpairedBracketsRuleTest extends TestCase {
+
+ public void testRuleGerman() throws IOException {
+ GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.GERMAN);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(Das sind die Sätze, die die testen sollen)."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule
+ .match(langTool.getAnalyzedSentence("Die „Sätze zum testen."));
+ assertEquals(1, matches.length);
+ }
+
+ public void testRuleSpanish() throws IOException {
+ GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.SPANISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.SPANISH);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Soy un hombre (muy honrado)."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("De dónde vas?"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("¡Atención"));
+ assertEquals(1, matches.length);
+ }
+
+ public void testRuleFrench() throws IOException {
+ GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.FRENCH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.FRENCH);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(Qu'est ce que c'est ?)"));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule
+ .match(langTool.getAnalyzedSentence("(Qu'est ce que c'est ?"));
+ assertEquals(1, matches.length);
+ }
+
+ public void testRuleDutch() throws IOException {
+ GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.DUTCH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.DUTCH);
+ // correct sentences:
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("Het centrale probleem van het werk is de ‘dichterlijke kuischheid’."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("Het centrale probleem van het werk is de „dichterlijke kuischheid."));
+ assertEquals(1, matches.length);
+ }
+
+ public void testRuleRomanian() throws IOException {
+ GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.ROMANIAN);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ROMANIAN);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat (pentru puțin timp)."));
+ assertEquals(0, matches.length);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Nu's de prin locurile astea."));
+ assertEquals(0, matches.length);
+ // cross-bracket matching
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost )plecat( pentru (puțin timp)."));
+ assertEquals(2, matches.length);
+ // cross-bracket matching
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost {plecat) pentru (puțin timp}."));
+ assertEquals(4, matches.length);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat pentru „puțin timp”."));
+ assertEquals(0, matches.length);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru... puțin timp”."));
+ assertEquals(0, matches.length);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru... «puțin» timp”."));
+ assertEquals(0, matches.length);
+ // correct sentences ( " is _not_ a Romanian symbol - just
+ // ignore it, the correct form is [„] (start quote) and [”] (end quote)
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat \"pentru puțin timp."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru... puțin timp."));
+ assertEquals(1, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("A fost plecat «puțin."));
+ assertEquals(1, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru «puțin timp”."));
+ assertEquals(3, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru puțin» timp”."));
+ assertEquals(3, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru... puțin» timp”."));
+ assertEquals(3, matches.length);
+ // cross-bracket matching
+ // incorrect sentences:
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("A fost plecat „pentru... «puțin” timp»."));
+ assertEquals(4, matches.length);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java
new file mode 100644
index 0000000..ce42345
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java
@@ -0,0 +1,98 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * @author Daniel Naber
+ */
+public class UppercaseSentenceStartRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ List<RuleMatch> matches;
+
+ matches = langTool.check("Dies ist ein Satz. Und hier kommt noch einer");
+ assertEquals(0, matches.size());
+ matches = langTool.check("Dies ist ein Satz. Ätsch, noch einer mit Umlaut.");
+ assertEquals(0, matches.size());
+ matches = langTool.check("Dieser Satz ist bspw. okay so.");
+ assertEquals(0, matches.size());
+ matches = langTool.check("Dieser Satz ist z.B. okay so.");
+ assertEquals(0, matches.size());
+ matches = langTool.check("Dies ist ein Satz. \"Aber der hier auch!\".");
+ assertEquals(0, matches.size());
+ matches = langTool.check("\"Dies ist ein Satz!\"");
+ assertEquals(0, matches.size());
+ matches = langTool.check("'Dies ist ein Satz!'");
+ assertEquals(0, matches.size());
+
+ matches = langTool.check("Sehr geehrte Frau Merkel,\nwie wir Ihnen schon früher mitgeteilt haben...");
+ assertEquals(0, matches.size());
+
+ matches = langTool.check("Dies ist ein Satz. und hier kommt noch einer");
+ assertEquals(1, matches.size());
+ matches = langTool.check("Dies ist ein Satz. ätsch, noch einer mit Umlaut.");
+ assertEquals(1, matches.size());
+ matches = langTool.check("Dies ist ein Satz. \"aber der hier auch!\"");
+ assertEquals(1, matches.size());
+ matches = langTool.check("\"dies ist ein Satz!\"");
+ assertEquals(1, matches.size());
+ matches = langTool.check("'dies ist ein Satz!'");
+ assertEquals(1, matches.size());
+
+ langTool = new JLanguageTool(Language.ENGLISH);
+ matches = langTool.check("In Nov. next year.");
+ assertEquals(0, matches.size());
+ }
+
+ public void testDutchSpecialCases() throws IOException {
+ JLanguageTool langTool = new JLanguageTool(Language.DUTCH);
+ List<RuleMatch> matches;
+
+ matches = langTool.check("A sentence.");
+ assertEquals(0, matches.size());
+ matches = langTool.check("'s Morgens...");
+ assertEquals(0, matches.size());
+
+ matches = langTool.check("a sentence.");
+ assertEquals(1, matches.size());
+ matches = langTool.check("'s morgens...");
+ assertEquals(1, matches.size());
+ matches = langTool.check("s sentence.");
+ assertEquals(1, matches.size());
+ }
+
+ public void testPolishSpecialCases() throws IOException {
+ JLanguageTool langTool = new JLanguageTool(Language.POLISH);
+ List<RuleMatch> matches;
+
+ matches = langTool.check("Zdanie.");
+ assertEquals(0, matches.size());
+ matches = langTool.check("To jest lista punktowana:\n\npunkt pierwszy,\n\npunkt drugi,\n\npunkt trzeci.");
+ assertEquals(0, matches.size());
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java
new file mode 100644
index 0000000..152dd07
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java
@@ -0,0 +1,75 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ *
+ * @author Marcin Milkowski
+ */
+public class WhitespaceRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ WhitespaceRule rule = new WhitespaceRule(TestTools.getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("\n\tThis is a test sentence..."));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(1, matches.length);
+ assertEquals(4, matches[0].getFromPos());
+ assertEquals(6, matches[0].getToPos());
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(1, matches.length);
+ assertEquals(14, matches[0].getFromPos());
+ assertEquals(17, matches[0].getToPos());
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(3, matches.length);
+ assertEquals(7, matches[0].getFromPos());
+ assertEquals(10, matches[0].getToPos());
+ assertEquals(11, matches[1].getFromPos());
+ assertEquals(13, matches[1].getToPos());
+ assertEquals(17, matches[2].getFromPos());
+ assertEquals(20, matches[2].getToPos());
+ matches = rule.match(langTool.getAnalyzedSentence("\t\t\t \t\t\t\t "));
+ assertEquals(1, matches.length);
+ langTool = new JLanguageTool(Language.POLISH);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("To jest test."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("To jest test."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java
new file mode 100644
index 0000000..01f9007
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java
@@ -0,0 +1,81 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.de.GermanWordRepeatRule;
+
+/**
+ *
+ * @author Daniel Naber
+ */
+public class WordRepeatRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence..."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This this is a test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("This is is a a test sentence sentence."));
+ assertEquals(3, matches.length);
+ }
+
+ public void testRuleGerman() throws IOException {
+ WordRepeatRule rule = new GermanWordRepeatRule(TestTools.getEnglishMessages(), Language.GERMAN);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Das sind die Sätze, die die testen sollen."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Sätze, die die testen."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Die die Sätze zum testen."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Und die die Sätze zum testen."));
+ assertEquals(1, matches.length);
+ }
+
+ public void testRulePolish() throws IOException {
+ WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), Language.POLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.POLISH);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("To jest jest zdanie."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java
new file mode 100644
index 0000000..01e4f6a
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java
@@ -0,0 +1,288 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRule;
+import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRuleLoader;
+import junit.framework.TestCase;
+
+public class BitextPatternRuleTest extends TestCase {
+
+ public void testBitextRulesFromXML() throws IOException {
+ testBitextRulesFromXML(null, false);
+ }
+
+ private void testBitextRulesFromXML(final Set<Language> ignoredLanguages,
+ final boolean verbose) throws IOException {
+ for (final Language lang : Language.LANGUAGES) {
+ if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
+ if (verbose) {
+ System.out.println("Ignoring tests for " + lang.getName());
+ }
+ continue;
+ }
+ final BitextPatternRuleLoader ruleLoader = new BitextPatternRuleLoader();
+ final String name = "/" + lang.getShortName() + "/bitext.xml";
+ final InputStream is = JLanguageTool.getDataBroker().getFromRulesDirAsStream(name);
+ if (is != null) {
+ if (verbose) {
+ System.out.println("Running tests for " + lang.getName() + "...");
+ }
+ final JLanguageTool languageTool = new JLanguageTool(lang);
+ final List<BitextPatternRule> rules = ruleLoader.getRules(is, name);
+ testBitextRulesFromXML(rules, languageTool, Language.POLISH);
+ }
+ }
+ }
+
+ private void testBitextRulesFromXML(final List<BitextPatternRule> rules,
+ final JLanguageTool languageTool, final Language lang) throws IOException {
+ final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>();
+ for (final BitextPatternRule rule : rules) {
+ testBitextRule(rule, lang, languageTool);
+ }
+ /*
+ if (!complexRules.isEmpty()) {
+ final Set<String> set = complexRules.keySet();
+ final List<PatternRule> badRules = new ArrayList<PatternRule>();
+ final Iterator<String> iter = set.iterator();
+ while (iter.hasNext()) {
+ final PatternRule badRule = complexRules.get(iter.next());
+ if (badRule != null) {
+ badRule.notComplexPhrase();
+ badRule
+ .setMessage("The rule contains a phrase that never matched any incorrect example.");
+ badRules.add(badRule);
+ }
+ }
+ if (!badRules.isEmpty()) {
+ testGrammarRulesFromXML(badRules, languageTool, lang);
+ }
+ }
+ */
+ }
+
+ private String cleanSentence(String str) {
+ return cleanXML(str.replaceAll("[\\n\\t]+", ""));
+ }
+
+ private void testMarker(int expectedMatchStart,
+ int expectedMatchEnd, Rule rule, Language lang) {
+ if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
+ fail(lang
+ + ": No error position markup ('<marker>...</marker>') in bad example in rule "
+ + rule);
+ }
+
+ }
+
+ private void testBadSentence(final String origBadSentence,
+ final List<String> suggestedCorrection, final int expectedMatchStart,
+ final int expectedMatchEnd, final PatternRule rule,
+ final Language lang,
+ final JLanguageTool languageTool) throws IOException {
+ final String badSentence = cleanXML(origBadSentence);
+ assertTrue(badSentence.trim().length() > 0);
+ RuleMatch[] matches = getMatches(rule, badSentence, languageTool);
+// if (!rule.isWithComplexPhrase()) {
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length
+ + ". Additional info:" + rule.getMessage(), matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue("You specified a correction but your message has no suggestions in rule " + rule,
+ rule.getMessage().contains("<suggestion>")
+ );
+ assertTrue(lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule " + rule,
+ suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+// }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ if (matches.length > 0) {
+ fail("Incorrect input:\n"
+ + " " + badSentence
+ + "\nCorrected sentence:\n"
+ + " " + fixedSentence
+ + "\nBy Rule:\n"
+ + " " + rule
+ + "\nThe correction triggered an error itself:\n"
+ + " " + matches[0] + "\n");
+ }
+ }
+ }
+ }
+ }
+
+ private void testBitextRule(final BitextPatternRule rule, final Language lang,
+ final JLanguageTool languageTool) throws IOException {
+ JLanguageTool srcTool = new JLanguageTool(rule.getSourceLang());
+ //int noSuggestionCount = 0;
+ final List<StringPair> goodSentences = rule.getCorrectBitextExamples();
+ for (StringPair goodSentence : goodSentences) {
+ assertTrue(cleanSentence(goodSentence.getSource()).trim().length() > 0);
+ assertTrue(cleanSentence(goodSentence.getTarget()).trim().length() > 0);
+ assertFalse(lang + ": Did not expect error in: " + goodSentence
+ + " (Rule: " + rule + ")",
+ match(rule, goodSentence.getSource(), goodSentence.getTarget(),
+ srcTool, languageTool));
+ }
+ final List<IncorrectBitextExample> badSentences = rule.getIncorrectBitextExamples();
+ for (IncorrectBitextExample origBadExample : badSentences) {
+ // enable indentation use
+ String origBadSrcSentence = origBadExample.getExample().getSource().replaceAll(
+ "[\\n\\t]+", "");
+ String origBadTrgSentence = origBadExample.getExample().getTarget().replaceAll(
+ "[\\n\\t]+", "");
+ final List<String> suggestedCorrection = origBadExample
+ .getCorrections();
+ final int expectedSrcMatchStart = origBadSrcSentence.indexOf("<marker>");
+ final int expectedSrcMatchEnd = origBadSrcSentence.indexOf("</marker>")
+ - "<marker>".length();
+ testMarker(expectedSrcMatchStart, expectedSrcMatchEnd, rule, lang);
+ final int expectedTrgMatchStart = origBadTrgSentence.indexOf("<marker>");
+ final int expectedTrgMatchEnd = origBadTrgSentence.indexOf("</marker>")
+ - "<marker>".length();
+ testMarker(expectedTrgMatchStart, expectedTrgMatchEnd, rule, lang);
+
+ testBadSentence(origBadSrcSentence,
+ suggestedCorrection, expectedSrcMatchStart,
+ expectedSrcMatchEnd, rule.getSrcRule(),
+ lang,
+ srcTool);
+
+ testBadSentence(origBadTrgSentence,
+ suggestedCorrection, expectedTrgMatchStart,
+ expectedTrgMatchEnd, rule.getTrgRule(),
+ lang,
+ languageTool);
+
+ }
+
+ /* } else { // for multiple rules created with complex phrases
+
+ matches = getMatches(rule, badSentence, languageTool);
+ if (matches.length == 0
+ && !complexRules.containsKey(rule.getId() + badSentence)) {
+ complexRules.put(rule.getId() + badSentence, rule);
+ }
+
+ if (matches.length != 0) {
+ complexRules.put(rule.getId() + badSentence, null);
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length,
+ matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue(
+ lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule "
+ + rule, suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+ }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ assertEquals("Corrected sentence for rule " + rule
+ + " triggered error: " + fixedSentence, 0, matches.length);
+ }
+ } else {
+ noSuggestionCount++;
+ }
+ } */
+ }
+
+
+
+
+ protected String cleanXML(final String str) {
+ return str.replaceAll("<([^<].*?)>", "");
+ }
+
+ private boolean match(final BitextPatternRule rule, final String src, final String trg,
+ final JLanguageTool srcLanguageTool,
+ final JLanguageTool trgLanguageTool) throws IOException {
+ final AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src);
+ final AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg);
+ final RuleMatch[] matches = rule.match(srcText, trgText);
+ return matches.length > 0;
+ }
+
+
+ private RuleMatch[] getMatches(final Rule rule, final String sentence,
+ final JLanguageTool languageTool) throws IOException {
+ final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+ final RuleMatch[] matches = rule.match(text);
+ return matches;
+ }
+
+ /**
+ * Test XML patterns, as a help for people developing rules that are not
+ * programmers.
+ */
+ public static void main(final String[] args) throws IOException {
+ final BitextPatternRuleTest prt = new BitextPatternRuleTest();
+ System.out.println("Running XML bitext pattern tests...");
+ prt.testBitextRulesFromXML();
+ System.out.println("Tests successful.");
+ }
+
+
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java
new file mode 100644
index 0000000..ab6cfc3
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class DifferentLengthRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ DifferentLengthRule rule = new DifferentLengthRule();
+ //(TestTools.getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool trgLangTool = new JLanguageTool(Language.ENGLISH);
+ JLanguageTool srcLangTool = new JLanguageTool(Language.POLISH);
+ rule.setSourceLang(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("This is a test sentence."),
+ trgLangTool.getAnalyzedSentence("To zdanie testowe."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("Click this button."),
+ trgLangTool.getAnalyzedSentence("Kliknij ten przycisk."));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("Open a file, and check if it is corrupt."),
+ trgLangTool.getAnalyzedSentence("Otwórz plik."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java
new file mode 100644
index 0000000..08a104f
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java
@@ -0,0 +1,91 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRule;
+import de.danielnaber.languagetool.rules.patterns.bitext.FalseFriendsAsBitextLoader;
+
+/**
+ * @author Marcin Miłkowski
+ */
+public class FalseFriendsAsBitextLoaderTest extends TestCase {
+
+
+ public void testHintsForPolishTranslators() throws IOException, ParserConfigurationException, SAXException {
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.POLISH);
+ JLanguageTool trgTool = new JLanguageTool(Language.POLISH);
+
+ FalseFriendsAsBitextLoader ruleLoader = new FalseFriendsAsBitextLoader();
+ final String name = "/false-friends.xml";
+ final List<BitextPatternRule> rules = ruleLoader.
+ getFalseFriendsAsBitext(
+ JLanguageTool.getDataBroker().getRulesDir() + name,
+ Language.ENGLISH, Language.POLISH);
+
+ assertErrors(1, rules, "This is an absurd.", "To absurd.", langTool, trgTool);
+ assertErrors(1, rules, "I have to speak to my advocate.", "Muszę porozmawiać z adwokatem.", langTool, trgTool);
+ assertErrors(1, rules, "This is not actual.", "To nie jest aktualne.", langTool, trgTool);
+ assertErrors(0, rules, "This is not actual.", "To nie jest rzeczywiste.", langTool, trgTool);
+ }
+
+ private List<RuleMatch> check(final List<BitextPatternRule> bRules,
+ final String src, final String trg,
+ final JLanguageTool srcTool, final JLanguageTool trgTool) throws IOException {
+ List<RuleMatch> allMatches = new ArrayList<RuleMatch>();
+ for (BitextPatternRule bRule : bRules) {
+ RuleMatch[] matches = match(bRule, src, trg, srcTool, trgTool);
+ if (matches != null) {
+ for (RuleMatch match : matches) {
+ allMatches.add(match);
+ }
+ }
+ }
+ return allMatches;
+ }
+
+ private RuleMatch[] match(final BitextPatternRule rule, final String src, final String trg,
+ final JLanguageTool srcLanguageTool,
+ final JLanguageTool trgLanguageTool) throws IOException {
+ final AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src);
+ final AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg);
+ return rule.match(srcText, trgText);
+ }
+
+ private void assertErrors(int errorCount,
+ final List<BitextPatternRule> rules,
+ final String src, final String trg, JLanguageTool srcTool, JLanguageTool trgTool) throws IOException {
+ List<RuleMatch> matches = check(rules, src, trg, srcTool, trgTool);
+ //System.err.println(matches);
+ assertEquals(errorCount, matches.size());
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java
new file mode 100644
index 0000000..6809034
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java
@@ -0,0 +1,57 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class SameTranslationRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ SameTranslationRule rule = new SameTranslationRule();
+ //(TestTools.getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool trgLangTool = new JLanguageTool(Language.FRENCH);
+ JLanguageTool srcLangTool = new JLanguageTool(Language.ENGLISH);
+ rule.setSourceLang(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("This is a test sentence."),
+ trgLangTool.getAnalyzedSentence("C'est la vie !"));
+ assertEquals(0, matches.length);
+
+ //tricky: proper names should be left as is!
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("Elvis Presley"),
+ trgLangTool.getAnalyzedSentence("Elvis Presley"));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(
+ srcLangTool.getAnalyzedSentence("This this is a test sentence."),
+ trgLangTool.getAnalyzedSentence("This this is a test sentence."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java
new file mode 100644
index 0000000..5383ea3
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ca;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ *
+ * Simple tests for rules/ca/AccentuacioSimpleReplaceRule class
+ *
+ * @author Ionuț Păduraru
+ */
+public class AccentuacioReplaceRuleTest extends TestCase {
+
+ private AccentuacioReplaceRule rule;
+ private JLanguageTool langTool;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ rule = new AccentuacioReplaceRule(TestTools.getMessages("ca"));
+ langTool = new JLanguageTool(Language.CATALAN);
+ }
+
+ public void testRule() throws IOException {
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tot està bé.")).length);
+
+ // incorrect sentences:
+
+ // at the beginning of a sentence (Romanian replace rule is case-sensitive)
+ checkSimpleReplaceRule("Pneumonia vírica.", "Pneumònia");
+ // inside sentence
+ checkSimpleReplaceRule("Supercopa d'Europa de futbòl.", "futbol");
+ }
+
+ /**
+ * Check if a specific replace rule applies.
+ *
+ * @param sentence
+ * the sentence containing the incorrect/misspeled word.
+ * @param word
+ * the word that is correct (the suggested replacement).
+ * @throws IOException
+ */
+ private void checkSimpleReplaceRule(String sentence, String word)
+ throws IOException {
+ RuleMatch[] matches;
+ matches = rule.match(langTool.getAnalyzedSentence(sentence));
+ assertEquals("Invalid matches.length while checking sentence: "
+ + sentence, 1, matches.length);
+ assertEquals("Invalid replacement count while checking sentence: "
+ + sentence, 1, matches[0].getSuggestedReplacements().size());
+ assertEquals("Invalid suggested replacement while checking sentence: "
+ + sentence, word, matches[0].getSuggestedReplacements().get(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java
new file mode 100644
index 0000000..d77b935
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ca;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ *
+ * Simple tests for rules/ca/SimpleReplaceRule class
+ *
+ * @author Ionuț Păduraru
+ */
+public class CastellanismesReplaceRuleTest extends TestCase {
+
+ private CastellanismesReplaceRule rule;
+ private JLanguageTool langTool;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ rule = new CastellanismesReplaceRule(TestTools.getMessages("ca"));
+ langTool = new JLanguageTool(Language.CATALAN);
+ }
+
+ public void testRule() throws IOException {
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tot està bé.")).length);
+
+ // incorrect sentences:
+
+ // at the beginning of a sentence (Romanian replace rule is case-sensitive)
+ checkSimpleReplaceRule("Después de la mort de Lenin.", "Després");
+ // inside sentence
+ checkSimpleReplaceRule("Un any después.", "després");
+ }
+
+ /**
+ * Check if a specific replace rule applies.
+ *
+ * @param sentence
+ * the sentence containing the incorrect/misspeled word.
+ * @param word
+ * the word that is correct (the suggested replacement).
+ * @throws IOException
+ */
+ private void checkSimpleReplaceRule(String sentence, String word)
+ throws IOException {
+ RuleMatch[] matches;
+ matches = rule.match(langTool.getAnalyzedSentence(sentence));
+ assertEquals("Invalid matches.length while checking sentence: "
+ + sentence, 1, matches.length);
+ assertEquals("Invalid replacement count wile checking sentence: "
+ + sentence, 1, matches[0].getSuggestedReplacements().size());
+ assertEquals("Invalid suggested replacement while checking sentence: "
+ + sentence, word, matches[0].getSuggestedReplacements().get(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java
new file mode 100644
index 0000000..a396a4d
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java
@@ -0,0 +1,190 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class AgreementRuleTest extends TestCase {
+
+ private AgreementRule rule;
+ private JLanguageTool langTool;
+
+ public void setUp() throws IOException {
+ rule = new AgreementRule(null);
+ langTool = new JLanguageTool(Language.GERMAN);
+ }
+
+ public void testDetNounRule() throws IOException {
+
+ /* debugging:
+ RuleMatch[] rm = rule.match(langTool.getAnalyzedSentence("Wer für die Kosten"));
+ System.err.println(rm[0]);
+ if (true)
+ return;
+ */
+
+ // correct sentences:
+ assertGood("So ist es in den USA.");
+ assertGood("Das ist der Tisch.");
+ assertGood("Das ist das Haus.");
+ assertGood("Das ist die Frau.");
+ assertGood("Das ist das Auto der Frau.");
+ assertGood("Das gehört dem Mann.");
+ assertGood("Das Auto des Mannes.");
+ assertGood("Das interessiert den Mann.");
+ assertGood("Das interessiert die Männer.");
+ assertGood("Das Auto von einem Mann.");
+ assertGood("Das Auto eines Mannes.");
+ assertGood("Des großen Mannes.");
+
+ assertGood("Das Dach von meinem Auto.");
+ assertGood("Das Dach von meinen Autos.");
+
+ assertGood("Das Dach meines Autos.");
+ assertGood("Das Dach meiner Autos.");
+
+ assertGood("Das Dach meines großen Autos.");
+ assertGood("Das Dach meiner großen Autos.");
+
+ assertGood("Das Wahlrecht, das Frauen damals zugesprochen bekamen.");
+ assertGood("Es war Karl, dessen Leiche Donnerstag gefunden wurde.");
+
+ assertGood("Erst recht ich Arbeiter.");
+ assertGood("Erst recht wir Arbeiter.");
+ assertGood("Erst recht wir fleißigen Arbeiter.");
+
+ assertGood("Dann lud er Freunde ein.");
+ assertGood("Dann lud sie Freunde ein.");
+ assertGood("Aller Kommunikation liegt dies zugrunde.");
+ assertGood("Pragmatisch wählt man solche Formeln als Axiome.");
+ assertGood("Der eine Polizist rief dem anderen zu...");
+ assertGood("Das eine Kind rief dem anderen zu...");
+ assertGood("Er wollte seine Interessen wahrnehmen.");
+
+ assertGood("... wo Krieg den Unschuldigen Leid und Tod bringt.");
+ assertGood("Der Abschuss eines Papageien.");
+
+ // relative clauses:
+ assertGood("Das Recht, das Frauen eingeräumt wird.");
+ assertGood("Der Mann, in dem quadratische Fische schwammen.");
+ assertGood("Gutenberg, der quadratische Mann.");
+ // TODO: not detected, because "die" is considered a relative pronoun:
+ //assertBad("Gutenberg, die Genie.");
+
+ // some of these used to cause false alarms:
+ assertGood("Das Münchener Fest.");
+ assertGood("Das Münchner Fest.");
+ assertGood("Die Planung des Münchener Festes.");
+ assertGood("Das Berliner Wetter.");
+ assertGood("Den Berliner Arbeitern ist das egal.");
+ assertGood("Das Haus des Berliner Arbeiters.");
+ assertGood("Es gehört dem Berliner Arbeiter.");
+ assertGood("Das Stuttgarter Auto.");
+ assertGood("Das Bielefelder Radio.");
+ assertGood("Das Gütersloher Radio.");
+
+ // incorrect sentences:
+ assertBad("Es sind die Tisch.");
+ assertBad("Es sind das Tisch.");
+ assertBad("Es sind die Haus.");
+ assertBad("Es sind der Haus.");
+ assertBad("Es sind das Frau.");
+ assertBad("Das Auto des Mann.");
+ assertBad("Das interessiert das Mann.");
+ assertBad("Das interessiert die Mann.");
+ assertBad("Das Auto ein Mannes.");
+ assertBad("Das Auto einem Mannes.");
+ assertBad("Das Auto einer Mannes.");
+ assertBad("Das Auto einen Mannes.");
+
+ assertBad("Des großer Mannes.");
+
+ assertBad("Das Dach von meine Auto.");
+ assertBad("Das Dach von meinen Auto.");
+
+ assertBad("Das Dach mein Autos.");
+ assertBad("Das Dach meinem Autos.");
+
+ assertBad("Das Dach meinem großen Autos.");
+ assertBad("Das Dach mein großen Autos.");
+
+ assertBad("Erst recht wir fleißiges Arbeiter.");
+
+ // TODO: not yet detected:
+ //assertBad("Erst recht ich fleißiges Arbeiter.");
+ //assertBad("Das Dach meine großen Autos.");
+ //assertBad("Das Dach meinen großen Autos.");
+ //assertBad("Das Dach meine Autos.");
+ //assertBad("Es ist das Haus dem Mann.");
+ //assertBad("Das interessiert der Männer.");
+ //assertBad("Das interessiert der Mann.");
+ //assertBad("Das gehört den Mann.");
+ //assertBad("Es sind der Frau.");
+ }
+
+ public void testRegression() throws IOException {
+ JLanguageTool gramCheckerEngine = new JLanguageTool(Language.GERMAN);
+ gramCheckerEngine.activateDefaultPatternRules();
+ // used to be not detected > 1.0.1:
+ String str = "Und so.\r\nDie Bier.";
+ List<RuleMatch> matches = gramCheckerEngine.check(str);
+ assertEquals(1, matches.size());
+ }
+
+ public void testDetAdjNounRule() throws IOException {
+ // correct sentences:
+ assertGood("Das ist der riesige Tisch.");
+ assertGood("Der riesige Tisch ist groß.");
+ assertGood("Die Kanten der der riesigen Tische.");
+ assertGood("Den riesigen Tisch mag er.");
+ assertGood("Es mag den riesigen Tisch.");
+ assertGood("Die Kante des riesigen Tisches.");
+ assertGood("Dem riesigen Tisch fehlt was.");
+ assertGood("Die riesigen Tische sind groß.");
+ assertGood("Der riesigen Tische wegen.");
+ // TODO: incorrectly detected as incorrect:
+ // Dann hat das natürlich Nachteile.
+
+ // incorrect sentences:
+ assertBad("Es sind die riesigen Tisch.");
+ //assertBad("Dort, die riesigen Tischs!"); // TODO: error not detected because of comma
+ assertBad("Als die riesigen Tischs kamen.");
+ assertBad("Als die riesigen Tisches kamen.");
+ // TODO: not yet detected:
+ //assertBad("Der riesigen Tisch und so.");
+ }
+
+ private void assertGood(String s) throws IOException {
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence(s)).length);
+ }
+
+ private void assertBad(String s) throws IOException {
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence(s)).length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java
new file mode 100644
index 0000000..bbabdc1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java
@@ -0,0 +1,116 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * @author Daniel Naber
+ */
+public class CaseRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ CaseRule rule = new CaseRule(null);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Ein einfacher Satz zum Testen.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Laufen fällt mir leicht.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Winseln stört.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das schlägt nicht so zu Buche.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Dirk Hetzel ist ein Name.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Sein Verhalten war okay.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. \"Ein Zitat.\"")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. 'Ein Zitat.'")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. «Ein Zitat.»")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. »Ein Zitat.«")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. (Noch einer.)")).length);
+ // works only thanks to addex.txt:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Der Nachfahre.")).length);
+ // both can be correct:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz, \"Ein Zitat.\"")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz, \"ein Zitat.\"")).length);
+ // Exception 'Le':
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Schon Le Monde schrieb das.")).length);
+ // unknown word:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("In Blubberdorf macht man das so.")).length);
+
+ // sentences that used to trigger an error because of incorrect compound tokenization:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das sind Euroscheine.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("John Stallman isst.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Gesellschafterin hier.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Dienerin hier.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Geigerin hier.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die ersten Gespanne erreichen Köln.")).length);
+
+ // used to trigger error because of wrong POS tagging:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlinge zieht sich zu.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlingen ziehen sich zu.")).length);
+
+ // TODO: nach dem Doppelpunkt wird derzeit nicht auf groß/klein getestet:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: kein Satz.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: Kein Satz.")).length);
+
+ // incorrect sentences:
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Ein Einfacher Satz zum Testen.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das Winseln Stört.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Sein verhalten war okay.")).length);
+ }
+
+ public void testSubstantivierteVerben() throws IOException {
+ CaseRule rule = new CaseRule(null);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das fahrende Auto.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Fahren ist einfach.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Denn das Fahren ist einfach.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das können wir so machen.")).length);
+ // incorrect sentences:
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das fahren ist einfach.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das fahren ist einfach.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das laufen ist einfach.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das essen ist einfach.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das gehen ist einfach.")).length);
+ }
+
+ public void testPhraseExceptions() throws IOException {
+ CaseRule rule = new CaseRule(null);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne Wenn und Aber.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("ohne Wenn und Aber")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne Wenn und Aber bla blubb.")).length);
+ // as long as phrase exception isn't complete, there's no error:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn und")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("wenn und aber")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("und aber")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("aber")).length);
+ // incorrect sentences:
+ // error not found here as it's in the XML rules:
+ //assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn und aber.")).length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java
new file mode 100644
index 0000000..caabdce
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java
@@ -0,0 +1,88 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * @author Daniel Naber
+ */
+public class CompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.GERMAN);
+ rule = new CompoundRule(null);
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "Eine tolle CD-ROM");
+ check(0, "Eine tolle CD-ROM.");
+ check(0, "Ein toller CD-ROM-Test.");
+ check(0, "Systemadministrator");
+ check(0, "System-Administrator");
+ check(0, "Eine Million Dollar");
+ check(0, "Das System des Administrators");
+ check(0, "Nur im Stand-by-Betrieb");
+ check(0, "Start, Ziel, Sieg");
+ check(0, "Roll-on-roll-off-Schiff");
+ // incorrect sentences:
+ check(1, "System Administrator", new String[]{"System-Administrator", "Systemadministrator"});
+ check(1, "bla bla bla bla bla System Administrator bla bla bla bla bla");
+ check(1, "System Administrator blubb");
+ check(1, "Der System Administrator");
+ check(1, "Der dumme System Administrator");
+ check(1, "CD ROM", new String[]{"CD-ROM"});
+ check(1, "Nur im Stand by Betrieb", new String[]{"Stand-by-Betrieb"});
+ check(1, "Ein echter Start Ziel Sieg", new String[]{"Start-Ziel-Sieg"});
+ check(1, "Ein echter Start Ziel Sieg.");
+ check(1, "Ein Start Ziel Sieg");
+ check(1, "Start Ziel Sieg");
+ check(1, "Start Ziel Sieg!");
+ check(2, "Der dumme System Administrator legt die CD ROM");
+ check(2, "Der dumme System Administrator legt die CD ROM.");
+ check(2, "Der dumme System Administrator legt die CD ROM ein blah");
+ check(2, "System Administrator CD ROM");
+ //FIXME: suggestions / longest match
+ //check(1, "Roll on roll off Schiff", new String[]{"Roll-on-roll-off-Schiff"});
+ check(1, "Spin off");
+ // no hyphen suggestion for some words:
+ check(1, "Das ist Haar sträubend", new String[]{"Haarsträubend"});
+ // Only hyphen suggestion for some words:
+ check(1, "Reality TV", new String[]{"Reality-TV"});
+ check(1, "Spin off", new String[]{"Spin-off"});
+ // also accept incorrect upper/lowercase spelling:
+ check(1, "Spin Off", new String[]{"Spin-Off"});
+ check(1, "CW Wert", new String[]{"CW-Wert"});
+ // also detect an error if only some of the hyphens are missing:
+ check(1, "Roll-on-roll-off Schiff", new String[]{"Roll-on-roll-off-Schiff"});
+ check(1, "E-Mail Adressen", new String[]{"E-Mail-Adressen"});
+ // first part is a single character:
+ check(0, "x-mal");
+ check(1, "x mal", new String[]{"x-mal"});
+ check(0, "y-Achse");
+ check(1, "y Achse", new String[]{"y-Achse"});
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java
new file mode 100644
index 0000000..5dacdd9
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * @author Daniel Naber
+ */
+public class DashRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ DashRule rule = new DashRule(null);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhung kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diätenerhöhung kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhungs-Manie kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- und Gehaltserhöhung kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- sowie Gehaltserhöhung kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- oder Gehaltserhöhung kam dann doch.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Erst so - Karl-Heinz dann blah.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Erst so -- Karl-Heinz aber...")).length);
+
+ // incorrect sentences:
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhung kam dann doch.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhung kam dann doch.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhungs- Manie kam dann doch.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhungs-Manie kam dann doch.")).length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java
new file mode 100644
index 0000000..4482771
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * @author Daniel Naber
+ */
+public class WiederVsWiderRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ WiederVsWiderRule rule = new WiederVsWiderRule(null);
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt wider, wie es wieder läuft.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation gut wider.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation wider.")).length);
+ // known to match although sentence is okay:
+ //assertEquals(0, rule.match(langTool.getAnalyzedText("Das spiegelt wieder wider, wie es läuft.")).length);
+ // errors:
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das spiegelt wieder, wie es wieder läuft.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation gut wieder.")).length);
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation wieder.")).length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java
new file mode 100644
index 0000000..c3465f4
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java
@@ -0,0 +1,87 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class WordCoherencyRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ final WordCoherencyRule rule = new WordCoherencyRule(null);
+ final JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist aufwendig, aber nicht zu aufwendig.")).length);
+ // as WordCoherencyRule keeps its state to check more than one sentence
+ // we need to create a new object each time:
+ rule.reset();
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist aufwändig, aber nicht zu aufwändig.")).length);
+ // errors:
+ rule.reset();
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das ist aufwendig, aber nicht zu aufwändig.")).length);
+ rule.reset();
+ assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das ist aufwändig, aber nicht zu aufwendig.")).length);
+ }
+
+ public void testRuleCompleteTexts() throws IOException {
+ final JLanguageTool langTool;
+ // complete texts:
+ List<RuleMatch> matches;
+ //matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwendig.");
+ //assertEquals(0, matches.size());
+ langTool = new JLanguageTool(Language.GERMAN);
+ matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwändig.");
+ assertEquals(0, matches.size());
+
+ matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwändig.");
+ assertEquals(1, matches.size());
+
+ matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwendig.");
+ assertEquals(1, matches.size());
+
+ // also find full forms:
+ matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwendiger als...");
+ assertEquals(0, matches.size());
+
+ matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwändiger als...");
+ assertEquals(1, matches.size());
+
+ matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwendiger als...");
+ assertEquals(1, matches.size());
+
+ matches = langTool.check("Das ist das aufwändigste. Aber hallo. Es ist wirklich aufwendiger als...");
+ assertEquals(1, matches.size());
+
+ matches = langTool.check("Das ist das aufwändigste. Aber hallo. Es ist wirklich aufwendig.");
+ assertEquals(1, matches.size());
+
+ // cross-paragraph checks
+ matches = langTool.check("Das ist das aufwändigste.\n\nAber hallo. Es ist wirklich aufwendig.");
+ assertEquals(1, matches.size());
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java
new file mode 100644
index 0000000..f9cfee1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java
@@ -0,0 +1,167 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class AvsAnRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was an hour ago."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A university is ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A one-way street ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An hour's work ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Going to an \"industry party\"."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An 8-year old boy ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An 18-year old boy ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("The A-levels are ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An NOP check ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A USA-wide license ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("...asked a UN member."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("In an un-united Germany..."));
+ assertEquals(0, matches.length);
+
+ //fixed false alarms:
+ matches = rule.match(langTool.getAnalyzedSentence("Here, a and b are supplementary angles."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("The Qur'an was translated into Polish."));
+ assertEquals(0, matches.length);
+
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("It was a hour ago."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was an sentence that's long."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An university"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A unintersting ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk with an long sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A hour's work ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Going to a \"industry party\"."));
+ assertEquals(1, matches.length);
+ // With uppercase letters:
+ matches = rule.match(langTool.getAnalyzedSentence("A University"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A Europe wide something"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("then an University sdoj fixme sdoopsd"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A 8-year old boy ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A 18-year old boy ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("...asked an UN member."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("In a un-united Germany..."));
+ assertEquals(1, matches.length);
+
+ //Test on acronyms/initials:
+ matches = rule.match(langTool.getAnalyzedSentence("A. R.J. Turgot"));
+ assertEquals(0, matches.length);
+
+ //mixed case as dictionary-based exception
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for an MSc?"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for a MSc?"));
+ assertEquals(1, matches.length);
+ //mixed case from general case
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for an XMR-based writer?"));
+ assertEquals(0, matches.length);
+
+ //Test on apostrophes
+ matches = rule.match(langTool.getAnalyzedSentence("Its name in English is a[1] (), plural A's, As, as, or a's."));
+ assertEquals(0, matches.length);
+ }
+
+ public void testSuggestions() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ assertEquals("a string", rule.suggestAorAn("string"));
+ assertEquals("a university", rule.suggestAorAn("university"));
+ assertEquals("an hour", rule.suggestAorAn("hour"));
+ assertEquals("an all-terrain", rule.suggestAorAn("all-terrain"));
+ assertEquals("a UNESCO", rule.suggestAorAn("UNESCO"));
+
+ }
+
+ public void testPositions() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // no quotes etc.:
+ matches = rule.match(langTool.getAnalyzedSentence("a industry standard."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ // quotes..
+ matches = rule.match(langTool.getAnalyzedSentence("a \"industry standard\"."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("a - industry standard\"."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("This is a \"industry standard\"."));
+ assertEquals(8, matches[0].getFromPos());
+ assertEquals(9, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("\"a industry standard\"."));
+ assertEquals(1, matches[0].getFromPos());
+ assertEquals(2, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("\"Many say this is a industry standard\"."));
+ assertEquals(18, matches[0].getFromPos());
+ assertEquals(19, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("Like many \"an desperado\" before him, Bart headed south into Mexico."));
+ assertEquals(11, matches[0].getFromPos());
+ assertEquals(13, matches[0].getToPos());
+
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java
new file mode 100644
index 0000000..0505a05
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * @author Daniel Naber
+ */
+public class CompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.ENGLISH);
+ rule = new CompoundRule(null);
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "The software supports case-sensitive search.");
+ check(0, "He is one-year-old.");
+ // incorrect sentences:
+ check(1, "case sensitive", new String[]{"case-sensitive"});
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java
new file mode 100644
index 0000000..c245b80
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java
@@ -0,0 +1,151 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class EnglishUnpairedBracketsRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ EnglishUnpairedBracketsRule rule = new EnglishUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test sentence)."));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool.getAnalyzedSentence("This is a word 'test'."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is the joint presidents' declaration."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("The screen is 20\" wide."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a [test] sentence..."));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]"));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("This is what he said: \"We believe in freedom. This is what we do.\""));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("(([20] [20] [20]))"));
+ assertEquals(0, matches.length);
+ // test for a case that created a false alarm after disambiguation
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a \"special test\", right?"));
+ assertEquals(0, matches.length);
+ // numerical bullets
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in Chapter 1)."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("The jury recommended that: (1) Four additional deputies be employed."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in section 1a)."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in section iv)."));
+ assertEquals(0, matches.length);
+
+ //inches exception shouldn't match " here:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("In addition, the government would pay a $1,000 \"cost of education\" grant to the schools."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Paradise lost to the alleged water needs of Texas' big cities Thursday."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Kill 'em all!"));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Puttin' on the Ritz"));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test sentence."));
+ assertEquals(1, matches.length);
+
+ //tests for Edward's bug
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a test with an apostrophe &'."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("&'"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("!'"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("What?'"));
+ assertEquals(1, matches.length);
+ //
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test” sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a {test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This [is (a test} sentence."));
+ assertEquals(3, matches.length);
+ }
+
+ public void testMultipleSentences() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+ tool.enableRule("EN_UNPAIRED_BRACKETS");
+
+ List<RuleMatch> matches;
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket:"
+ + "[This is bracket. With some text.] and this continues.\n");
+ assertEquals(0, matches.size());
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket:"
+ + "[This is bracket. With some text. And this continues.\n\n");
+ assertEquals(1, matches.size());
+ // now with a paragraph end inside - we get two alarms because of paragraph
+ // resetting
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket. "
+ + "(This is bracket. \n\n With some text.) and this continues.");
+ assertEquals(2, matches.size());
+ }
+
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java
new file mode 100644
index 0000000..7163ed2
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java
@@ -0,0 +1,74 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.es;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Susana Sotelo Docio
+ *
+ * based on English tests
+ */
+public class ElwithFemRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ ElwithFemRule rule = new ElwithFemRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.SPANISH);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("El alma inmortal."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Tomaré un agua."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Usa mejor el hacha."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Lo escondí bajo el haya."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("La foto del \"aura\" se la debo a él."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Encontraron un ánfora ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Ningún acta ..."));
+ assertEquals(0, matches.length);
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("La alma inmortal."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Tomaré una agua."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Usa mejor la hacha."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Lo escondí bajo la haya."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("La foto de la \"aura\" se la debo a él."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Ninguna acta ..."));
+ assertEquals(1, matches.length);
+ // With uppercase letters:
+ matches = rule.match(langTool.getAnalyzedSentence("En La Haya se vive muy bien."));
+ assertEquals(0, matches.length);
+ // With accented chars
+ //matches = rule.match(langTool.getAnalyzedSentence("Encontraron una ánfora ..."));
+ //assertEquals(1, matches.length);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java
new file mode 100644
index 0000000..159e4d0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java
@@ -0,0 +1,75 @@
+ /* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.fr;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+
+/**
+ * @author Marcin Miłkowski
+ */
+
+public class QuestionWhitespaceRuleTest extends TestCase {
+
+
+
+ public final void testRule() throws IOException {
+ QuestionWhitespaceRule rule = new QuestionWhitespaceRule(TestTools.getEnglishMessages());
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.FRENCH);
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("C'est vrai !")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est ?")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple : philosophique")).length);
+
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("C'est vrai!"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("C'est vrai !"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est ?"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est?"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple: philosophique;"));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple: philosophique ;"));
+ assertEquals(2, matches.length);
+ // check match positions:
+ assertEquals(2, matches.length);
+ assertEquals(29, matches[0].getFromPos());
+ assertEquals(36, matches[0].getToPos());
+ assertEquals(50, matches[1].getFromPos());
+ assertEquals(52, matches[1].getToPos());
+ //guillemets
+ matches = rule.match(langTool.getAnalyzedSentence("Le guillemet ouvrant est suivi d'un espace insécable : « mais le lieu [...] et le guillemet fermant est précédé d'un espace insécable : [...] littérature »."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Le guillemet ouvrant est suivi d'un espace insécable : «mais le lieu [...] et le guillemet fermant est précédé d'un espace insécable : [...] littérature»."));
+ assertEquals(2, matches.length);
+ }
+
+ }
+
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java
new file mode 100644
index 0000000..f9cfee1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java
@@ -0,0 +1,167 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class AvsAnRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was an hour ago."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A university is ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A one-way street ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An hour's work ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Going to an \"industry party\"."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An 8-year old boy ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An 18-year old boy ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("The A-levels are ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An NOP check ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A USA-wide license ..."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("...asked a UN member."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("In an un-united Germany..."));
+ assertEquals(0, matches.length);
+
+ //fixed false alarms:
+ matches = rule.match(langTool.getAnalyzedSentence("Here, a and b are supplementary angles."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("The Qur'an was translated into Polish."));
+ assertEquals(0, matches.length);
+
+ // errors:
+ matches = rule.match(langTool.getAnalyzedSentence("It was a hour ago."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was an sentence that's long."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("An university"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A unintersting ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk with an long sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A hour's work ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Going to a \"industry party\"."));
+ assertEquals(1, matches.length);
+ // With uppercase letters:
+ matches = rule.match(langTool.getAnalyzedSentence("A University"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A Europe wide something"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("then an University sdoj fixme sdoopsd"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A 8-year old boy ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A 18-year old boy ..."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("...asked an UN member."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("In a un-united Germany..."));
+ assertEquals(1, matches.length);
+
+ //Test on acronyms/initials:
+ matches = rule.match(langTool.getAnalyzedSentence("A. R.J. Turgot"));
+ assertEquals(0, matches.length);
+
+ //mixed case as dictionary-based exception
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for an MSc?"));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for a MSc?"));
+ assertEquals(1, matches.length);
+ //mixed case from general case
+ matches = rule.match(langTool.getAnalyzedSentence("Anyone for an XMR-based writer?"));
+ assertEquals(0, matches.length);
+
+ //Test on apostrophes
+ matches = rule.match(langTool.getAnalyzedSentence("Its name in English is a[1] (), plural A's, As, as, or a's."));
+ assertEquals(0, matches.length);
+ }
+
+ public void testSuggestions() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ assertEquals("a string", rule.suggestAorAn("string"));
+ assertEquals("a university", rule.suggestAorAn("university"));
+ assertEquals("an hour", rule.suggestAorAn("hour"));
+ assertEquals("an all-terrain", rule.suggestAorAn("all-terrain"));
+ assertEquals("a UNESCO", rule.suggestAorAn("UNESCO"));
+
+ }
+
+ public void testPositions() throws IOException {
+ AvsAnRule rule = new AvsAnRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // no quotes etc.:
+ matches = rule.match(langTool.getAnalyzedSentence("a industry standard."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ // quotes..
+ matches = rule.match(langTool.getAnalyzedSentence("a \"industry standard\"."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("a - industry standard\"."));
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(1, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("This is a \"industry standard\"."));
+ assertEquals(8, matches[0].getFromPos());
+ assertEquals(9, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("\"a industry standard\"."));
+ assertEquals(1, matches[0].getFromPos());
+ assertEquals(2, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("\"Many say this is a industry standard\"."));
+ assertEquals(18, matches[0].getFromPos());
+ assertEquals(19, matches[0].getToPos());
+
+ matches = rule.match(langTool.getAnalyzedSentence("Like many \"an desperado\" before him, Bart headed south into Mexico."));
+ assertEquals(11, matches[0].getFromPos());
+ assertEquals(13, matches[0].getToPos());
+
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java
new file mode 100644
index 0000000..0505a05
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * @author Daniel Naber
+ */
+public class CompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.ENGLISH);
+ rule = new CompoundRule(null);
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "The software supports case-sensitive search.");
+ check(0, "He is one-year-old.");
+ // incorrect sentences:
+ check(1, "case sensitive", new String[]{"case-sensitive"});
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java
new file mode 100644
index 0000000..c245b80
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java
@@ -0,0 +1,151 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class EnglishUnpairedBracketsRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ EnglishUnpairedBracketsRule rule = new EnglishUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.ENGLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test sentence)."));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool.getAnalyzedSentence("This is a word 'test'."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is the joint presidents' declaration."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("The screen is 20\" wide."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a [test] sentence..."));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]"));
+ assertEquals(0, matches.length);
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("This is what he said: \"We believe in freedom. This is what we do.\""));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("(([20] [20] [20]))"));
+ assertEquals(0, matches.length);
+ // test for a case that created a false alarm after disambiguation
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a \"special test\", right?"));
+ assertEquals(0, matches.length);
+ // numerical bullets
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in Chapter 1)."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("The jury recommended that: (1) Four additional deputies be employed."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in section 1a)."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("We discussed this in section iv)."));
+ assertEquals(0, matches.length);
+
+ //inches exception shouldn't match " here:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("In addition, the government would pay a $1,000 \"cost of education\" grant to the schools."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Paradise lost to the alleged water needs of Texas' big cities Thursday."));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Kill 'em all!"));
+ assertEquals(0, matches.length);
+
+ matches = rule.match(langTool
+ .getAnalyzedSentence("Puttin' on the Ritz"));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test sentence."));
+ assertEquals(1, matches.length);
+
+ //tests for Edward's bug
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a test with an apostrophe &'."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("&'"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("!'"));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("What?'"));
+ assertEquals(1, matches.length);
+ //
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(This is a test” sentence."));
+ assertEquals(2, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This is a {test sentence."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool
+ .getAnalyzedSentence("This [is (a test} sentence."));
+ assertEquals(3, matches.length);
+ }
+
+ public void testMultipleSentences() throws IOException {
+ final JLanguageTool tool = new JLanguageTool(Language.ENGLISH);
+ tool.enableRule("EN_UNPAIRED_BRACKETS");
+
+ List<RuleMatch> matches;
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket:"
+ + "[This is bracket. With some text.] and this continues.\n");
+ assertEquals(0, matches.size());
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket:"
+ + "[This is bracket. With some text. And this continues.\n\n");
+ assertEquals(1, matches.size());
+ // now with a paragraph end inside - we get two alarms because of paragraph
+ // resetting
+ matches = tool
+ .check("This is multiple sentence text that contains a bracket. "
+ + "(This is bracket. \n\n With some text.) and this continues.");
+ assertEquals(2, matches.size());
+ }
+
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java
new file mode 100644
index 0000000..8410cff
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns;
+
+import junit.framework.TestCase;
+
+public class ElementTest extends TestCase {
+
+
+ public void testSentStart() {
+ Element elem = new Element("", false, false, false);
+ elem.setPosElement("SENT_START", false, false);
+ assertTrue(elem.isSentStart());
+ elem.setPosElement("SENT_START", false, true);
+ assertFalse(elem.isSentStart());
+ elem.setPosElement("SENT_START", true, false);
+ assertTrue(elem.isSentStart());
+ elem.setPosElement("SENT_START", true, true);
+ assertFalse(elem.isSentStart());
+
+ //this should be false:
+ elem = new Element("bla|blah", false, true, false);
+ elem.setPosElement("foo", true, true);
+ assertFalse(elem.isSentStart());
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java
new file mode 100644
index 0000000..14d73ac
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java
@@ -0,0 +1,87 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class FalseFriendRuleTest extends TestCase {
+
+ public void testHintsForGermanSpeakers() throws IOException, ParserConfigurationException, SAXException {
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.GERMAN);
+ langTool.activateDefaultFalseFriendRules();
+ assertErrors(1, "We will berate you.", langTool);
+ assertErrors(0, "We will give you advice.", langTool);
+ assertErrors(1, "I go to high school in Foocity.", langTool);
+ }
+
+ public void testHintsForEnglishSpeakers() throws IOException, ParserConfigurationException, SAXException {
+ JLanguageTool langTool = new JLanguageTool(Language.GERMAN, Language.ENGLISH);
+ langTool.activateDefaultFalseFriendRules();
+ assertErrors(1, "Man sollte ihn nicht so beraten.", langTool);
+ assertErrors(0, "Man sollte ihn nicht so beschimpfen.", langTool);
+ assertErrors(1, "Ich gehe in Blubbstadt zur Hochschule.", langTool);
+ }
+
+ public void testHintsForPolishSpeakers() throws IOException, ParserConfigurationException, SAXException {
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.POLISH);
+ langTool.activateDefaultFalseFriendRules();
+ assertErrors(1, "This is an absurd.", langTool);
+ assertErrors(0, "This is absurdity.", langTool);
+ assertSuggestions(0, "This is absurdity.", langTool);
+ assertErrors(1, "I have to speak to my advocate.", langTool);
+ assertSuggestions(3, "My brother is politic.", langTool);
+ }
+
+ private void assertErrors(int errorCount, String s, JLanguageTool langTool) throws IOException {
+ List<RuleMatch> matches = langTool.check(s);
+ //System.err.println(matches);
+ assertEquals(errorCount, matches.size());
+ }
+
+ private void assertSuggestions(final int suggestionCount, final String s, final JLanguageTool langTool) throws IOException {
+ final List<RuleMatch> matches = langTool.check(s);
+ int suggFound = 0;
+ for (final RuleMatch match : matches) {
+ int pos = 0;
+ while (pos != -1) {
+ pos = match.getMessage().indexOf("<suggestion>", pos + 1);
+ suggFound ++;
+ }
+ }
+ if (suggFound > 0) {
+ suggFound--;
+ }
+ assertEquals(suggestionCount, suggFound);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
new file mode 100644
index 0000000..a1dfeaa
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
@@ -0,0 +1,502 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class PatternRuleTest extends TestCase {
+
+ private static JLanguageTool langTool;
+
+ private static final Pattern PROBABLE_REGEX = Pattern.compile("[^\\[\\]\\*\\+\\|\\^\\{\\}\\?][\\[\\]\\*\\+\\|\\^\\{\\}\\?]|\\\\[^0-9]|\\(.+\\)|\\..");
+
+ private static final Pattern CASE_REGEX = Pattern.compile("\\[(.)(.)\\]");
+
+
+ @Override
+ public void setUp() throws IOException {
+ if (langTool == null) {
+ langTool = new JLanguageTool(Language.ENGLISH);
+ }
+ }
+
+ public void testGrammarRulesFromXML() throws IOException {
+ testGrammarRulesFromXML(null, false);
+ }
+
+ private void testGrammarRulesFromXML(final Set<Language> ignoredLanguages,
+ final boolean verbose) throws IOException {
+ for (final Language lang : Language.LANGUAGES) {
+ if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
+ if (verbose) {
+ System.out.println("Ignoring tests for " + lang.getName());
+ }
+ continue;
+ }
+ if (verbose) {
+ System.out.println("Running tests for " + lang.getName() + "...");
+ }
+ final PatternRuleLoader ruleLoader = new PatternRuleLoader();
+ final JLanguageTool languageTool = new JLanguageTool(lang);
+ final String name = "/" + lang.getShortName() + "/grammar.xml";
+ final List<PatternRule> rules = ruleLoader.getRules(JLanguageTool.getDataBroker().
+ getFromRulesDirAsStream(name), name);
+ warnIfRegexpSyntax(rules, lang);
+ testGrammarRulesFromXML(rules, languageTool, lang);
+ }
+ }
+
+ // TODO: probably this would be more useful for exceptions
+ // instead of adding next methods to PatternRule
+ // we can probably validate using XSD and specify regexes straight there
+ private void warnIfRegexpSyntax(final List<PatternRule> rules,
+ final Language lang) {
+ for (final PatternRule rule : rules) {
+ int i = 0;
+ for (final Element element : rule.getElements()) {
+ i++;
+ warnIfElementNotKosher(element, lang, rule.getId());
+ if (element.getExceptionList() != null) {
+ for (final Element exception: element.getExceptionList()) {
+ warnIfElementNotKosher(exception, lang, rule.getId()
+ + " (exception in token [" + i + "]:" + element +") ");
+ }
+ }
+ }
+ }
+ }
+
+ private void warnIfElementNotKosher(final Element element,
+ final Language lang, final String ruleId) {
+ if (!element.isRegularExpression()
+ && (PROBABLE_REGEX.matcher(element.getString())
+ .find())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is not marked as regular expression"
+ + " but probably is one.");
+ }
+ if (element.isRegularExpression() && "".equals(element.getString())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains an empty string element " + "\"" + element
+ + "\" that is marked as regular expression (don't look at the POS tag, it might be OK).");
+ } else if (element.isRegularExpression()
+ && !PROBABLE_REGEX.matcher(element.getString())
+ .find()) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is marked as regular expression"
+ + " but probably is not one.");
+ }
+
+ if (element.isInflected()
+ && "".equals(element.getString())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is marked as inflected"
+ + " but is empty, so the attribute is redundant.");
+ }
+
+ if (element.isRegularExpression() && !element.getCaseSensitive()) {
+ Matcher matcher = CASE_REGEX.matcher(element.getString());
+ if (matcher.find()) {
+ final String letter1 = matcher.group(1);
+ final String letter2 = matcher.group(2);
+
+ if (!letter1.equals(letter2)
+ && letter1.toLowerCase().equals(letter2.toLowerCase())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains regexp part [" + letter1 + letter2
+ + "] which is useless without case_sensitive=\"yes\".");
+ }
+ }
+ }
+
+ if (element.isRegularExpression() && element.getString().contains("|")) {
+ final String[] groups = element.getString().split("\\)");
+ final boolean caseSensitive = element.getCaseSensitive();
+ for (final String group : groups) {
+ final String[] alt = group.split("\\|");
+ final Set<String> partSet = new HashSet<String>();
+ final Set<String> partSetNoCase = new HashSet<String>();
+ for (String part : alt) {
+ String partNoCase = caseSensitive ? part : part.toLowerCase();
+ if (partSetNoCase.contains(partNoCase)) {
+ if (partSet.contains(part)) {
+ // Duplicate disjunction parts "foo|foo".
+ System.err.println("The " + lang.toString() + " rule : "
+ + ruleId + " contains duplicated disjunction part ("
+ + part + ") within the element " + "\"" + element + "\".");
+ } else {
+ // Duplicate disjunction parts "Foo|foo" since element ignores case.
+ System.err.println("The " + lang.toString() + " rule : "
+ + ruleId + " contains duplicated non case sensitive disjunction part ("
+ + part + ") within the element " + "\"" + element + "\". Did you "
+ + "forget case_sensitive=\"yes\"?");
+ }
+ }
+ partSetNoCase.add(partNoCase);
+ partSet.add(part);
+ }
+ }
+ }
+ }
+
+
+ private void testGrammarRulesFromXML(final List<PatternRule> rules,
+ final JLanguageTool languageTool, final Language lang) throws IOException {
+ int noSuggestionCount = 0;
+ final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>();
+ for (final PatternRule rule : rules) {
+ final List<String> goodSentences = rule.getCorrectExamples();
+ for (String goodSentence : goodSentences) {
+ // enable indentation use
+ goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
+ goodSentence = cleanXML(goodSentence);
+ assertTrue(goodSentence.trim().length() > 0);
+ assertFalse(lang + ": Did not expect error in: " + goodSentence
+ + " (Rule: " + rule + ")", match(rule, goodSentence, languageTool));
+ }
+ final List<IncorrectExample> badSentences = rule.getIncorrectExamples();
+ for (IncorrectExample origBadExample : badSentences) {
+ // enable indentation use
+ String origBadSentence = origBadExample.getExample().replaceAll(
+ "[\\n\\t]+", "");
+ final List<String> suggestedCorrection = origBadExample
+ .getCorrections();
+ final int expectedMatchStart = origBadSentence.indexOf("<marker>");
+ final int expectedMatchEnd = origBadSentence.indexOf("</marker>")
+ - "<marker>".length();
+ if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
+ fail(lang
+ + ": No error position markup ('<marker>...</marker>') in bad example in rule "
+ + rule);
+ }
+ final String badSentence = cleanXML(origBadSentence);
+ assertTrue(badSentence.trim().length() > 0);
+ RuleMatch[] matches = getMatches(rule, badSentence, languageTool);
+ if (!rule.isWithComplexPhrase()) {
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length
+ + ". Additional info:" + rule.getMessage(), matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue("You specified a correction but your message has no suggestions in rule " + rule,
+ rule.getMessage().contains("<suggestion>")
+ );
+ assertTrue(lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule " + rule,
+ suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+ }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ if (matches.length > 0) {
+ fail("Incorrect input:\n"
+ + " " + badSentence
+ + "\nCorrected sentence:\n"
+ + " " + fixedSentence
+ + "\nBy Rule:\n"
+ + " " + rule
+ + "\nThe correction triggered an error itself:\n"
+ + " " + matches[0] + "\n");
+ }
+ }
+ } else {
+ noSuggestionCount++;
+ }
+ } else { // for multiple rules created with complex phrases
+
+ matches = getMatches(rule, badSentence, languageTool);
+ if (matches.length == 0
+ && !complexRules.containsKey(rule.getId() + badSentence)) {
+ complexRules.put(rule.getId() + badSentence, rule);
+ }
+
+ if (matches.length != 0) {
+ complexRules.put(rule.getId() + badSentence, null);
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length,
+ matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue(
+ lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule "
+ + rule, suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+ }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ assertEquals("Corrected sentence for rule " + rule
+ + " triggered error: " + fixedSentence, 0, matches.length);
+ }
+ } else {
+ noSuggestionCount++;
+ }
+ }
+ }
+
+ }
+ }
+ if (!complexRules.isEmpty()) {
+ final Set<String> set = complexRules.keySet();
+ final List<PatternRule> badRules = new ArrayList<PatternRule>();
+ final Iterator<String> iter = set.iterator();
+ while (iter.hasNext()) {
+ final PatternRule badRule = complexRules.get(iter.next());
+ if (badRule != null) {
+ badRule.notComplexPhrase();
+ badRule
+ .setMessage("The rule contains a phrase that never matched any incorrect example.");
+ badRules.add(badRule);
+ }
+ }
+ if (!badRules.isEmpty()) {
+ testGrammarRulesFromXML(badRules, languageTool, lang);
+ }
+ }
+ }
+
+ protected String cleanXML(final String str) {
+ return str.replaceAll("<([^<].*?)>", "");
+ }
+
+ private boolean match(final Rule rule, final String sentence,
+ final JLanguageTool languageTool) throws IOException {
+ final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+ final RuleMatch[] matches = rule.match(text);
+ return matches.length > 0;
+ }
+
+ private RuleMatch[] getMatches(final Rule rule, final String sentence,
+ final JLanguageTool languageTool) throws IOException {
+ final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+ final RuleMatch[] matches = rule.match(text);
+ /*
+ * for (int i = 0; i < matches.length; i++) {
+ * System.err.println(matches[i]); }
+ */
+ return matches;
+ }
+
+ public void testUppercasingSuggestion() throws IOException {
+ final JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ langTool.activateDefaultPatternRules();
+ final List<RuleMatch> matches = langTool
+ .check("Were are in the process of ...");
+ assertEquals(1, matches.size());
+ final RuleMatch match = matches.get(0);
+ final List<String> sugg = match.getSuggestedReplacements();
+ assertEquals(2, sugg.size());
+ assertEquals("Where", sugg.get(0));
+ assertEquals("We", sugg.get(1));
+ }
+
+ public void testRule() throws IOException {
+ PatternRule pr;
+ RuleMatch[] matches;
+
+ pr = makePatternRule("one");
+ matches = pr
+ .match(langTool.getAnalyzedSentence("A non-matching sentence."));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool
+ .getAnalyzedSentence("A matching sentence with one match."));
+ assertEquals(1, matches.length);
+ assertEquals(25, matches[0].getFromPos());
+ assertEquals(28, matches[0].getToPos());
+ // these two are not set if the rule is called standalone (not via
+ // JLanguageTool):
+ assertEquals(-1, matches[0].getColumn());
+ assertEquals(-1, matches[0].getLine());
+ assertEquals("ID1", matches[0].getRule().getId());
+ assertTrue(matches[0].getMessage().equals("user visible message"));
+ assertTrue(matches[0].getShortMessage().equals("short comment"));
+ matches = pr.match(langTool
+ .getAnalyzedSentence("one one and one: three matches"));
+ assertEquals(3, matches.length);
+
+ pr = makePatternRule("one two");
+ matches = pr.match(langTool.getAnalyzedSentence("this is one not two"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("this is two one"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("this is one two three"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one two"));
+ assertEquals(1, matches.length);
+
+ pr = makePatternRule("one|foo|xxxx two", false, true);
+ matches = pr.match(langTool.getAnalyzedSentence("one foo three"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("foo two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one foo two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("y x z one two blah foo"));
+ assertEquals(1, matches.length);
+
+ pr = makePatternRule("one|foo|xxxx two|yyy", false, true);
+ matches = pr.match(langTool.getAnalyzedSentence("one, yyy"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one yyy"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("xxxx two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("xxxx yyy"));
+ assertEquals(1, matches.length);
+ }
+
+ private PatternRule makePatternRule(final String s) {
+ return makePatternRule(s, false, false);
+ }
+
+ private PatternRule makePatternRule(final String s,
+ final boolean caseSensitive, final boolean regex) {
+ final List<Element> elems = new ArrayList<Element>();
+ final String[] parts = s.split(" ");
+ boolean pos = false;
+ Element se = null;
+ for (final String element : parts) {
+ if (element.equals("SENT_START")) {
+ pos = true;
+ }
+ if (!pos) {
+ se = new Element(element, caseSensitive, regex, false);
+ } else {
+ se = new Element("", caseSensitive, regex, false);
+ }
+ if (pos) {
+ se.setPosElement(element, false, false);
+ }
+ elems.add(se);
+ pos = false;
+ }
+ final PatternRule rule = new PatternRule("ID1", Language.ENGLISH, elems,
+ "test rule", "user visible message", "short comment");
+ return rule;
+ }
+
+ public void testSentenceStart() throws IOException {
+ PatternRule pr;
+ RuleMatch[] matches;
+
+ pr = makePatternRule("SENT_START One");
+ matches = pr.match(langTool.getAnalyzedSentence("Not One word."));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("One word."));
+ assertEquals(1, matches.length);
+ }
+
+ private static String callFormatMultipleSynthesis(final String[] suggs,
+ final String left, final String right) throws IllegalArgumentException,
+ SecurityException, InvocationTargetException, IllegalAccessException,
+ NoSuchMethodException {
+ Class[] argClasses = { String[].class, String.class, String.class };
+ Object[] argObjects = { suggs, left, right };
+ return TestTools.callStringStaticMethod(PatternRule.class,
+ "formatMultipleSynthesis", argClasses, argObjects);
+ }
+
+ /* test private methods as well */
+ public void testformatMultipleSynthesis() throws IllegalArgumentException,
+ SecurityException, InvocationTargetException, IllegalAccessException,
+ NoSuchMethodException {
+ final String[] suggArray = { "blah blah", "foo bar" };
+
+ assertEquals(
+ "This is how you should write: <suggestion>blah blah</suggestion>, <suggestion>foo bar</suggestion>.",
+
+ callFormatMultipleSynthesis(suggArray,
+ "This is how you should write: <suggestion>", "</suggestion>."));
+
+ final String[] suggArray2 = { "test", " " };
+
+ assertEquals(
+ "This is how you should write: <suggestion>test</suggestion>, <suggestion> </suggestion>.",
+
+ callFormatMultipleSynthesis(suggArray2,
+ "This is how you should write: <suggestion>", "</suggestion>."));
+ }
+
+ /**
+ * Test XML patterns, as a help for people developing rules that are not
+ * programmers.
+ */
+ public static void main(final String[] args) throws IOException {
+ final PatternRuleTest prt = new PatternRuleTest();
+ System.out.println("Running XML pattern tests...");
+ prt.setUp();
+ final Set<Language> ignoredLanguages = new HashSet<Language>();
+ // ignoredLanguages.add(Language.CZECH); // has no XML rules yet
+ prt.testGrammarRulesFromXML(ignoredLanguages, true);
+ System.out.println("Tests successful.");
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java
new file mode 100644
index 0000000..ec05e25
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java
@@ -0,0 +1,283 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.List;
+import java.util.ArrayList;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class TestUnifier extends TestCase {
+
+ // trivial unification = test if the character case is the same
+ public void testUnificationCase() {
+ Unifier uni = new Unifier();
+ Element elLower = new Element("\\p{Ll}+", true, true, false);
+ Element elUpper = new Element("\\p{Lu}\\p{Ll}+", true, true, false);
+ Element elAllUpper = new Element("\\p{Lu}+$", true, true, false);
+ uni.setEquivalence("case-sensitivity", "lowercase", elLower);
+ uni.setEquivalence("case-sensitivity", "uppercase", elUpper);
+ uni.setEquivalence("case-sensitivity", "alluppercase", elAllUpper);
+ AnalyzedToken lower1 = new AnalyzedToken("lower", "JJR", "lower");
+ AnalyzedToken lower2 = new AnalyzedToken("lowercase", "JJ", "lowercase");
+ AnalyzedToken upper1 = new AnalyzedToken("Uppercase", "JJ", "Uppercase");
+ AnalyzedToken upper2 = new AnalyzedToken("John", "NNP", "John");
+ AnalyzedToken upperall1 = new AnalyzedToken("JOHN", "NNP", "John");
+ AnalyzedToken upperall2 = new AnalyzedToken("JAMES", "NNP", "James");
+
+ Map<String, List<String>> equiv = new HashMap<String, List<String>>();
+ List<String> list1 = new ArrayList<String>();
+ list1.add("lowercase");
+ equiv.put("case-sensitivity", list1);
+ boolean satisfied = uni.isSatisfied(lower1, equiv);
+ satisfied &= uni.isSatisfied(lower2, equiv);
+ uni.startUnify();
+ assertEquals(true, satisfied);
+ uni.reset();
+ satisfied = uni.isSatisfied(upper2, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(lower2, equiv);
+ assertEquals(false, satisfied);
+ uni.reset();
+ satisfied = uni.isSatisfied(upper1, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(lower1, equiv);
+ assertEquals(false, satisfied);
+ uni.reset();
+ satisfied = uni.isSatisfied(upper2, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(upper1, equiv);
+ assertEquals(false, satisfied);
+ uni.reset();
+ equiv.clear();
+ list1.clear();
+ list1.add("uppercase");
+ equiv.put("case-sensitivity", list1);
+ satisfied = uni.isSatisfied(upper2, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(upper1, equiv);
+ assertEquals(true, satisfied);
+ uni.reset();
+ equiv.clear();
+ list1.clear();
+ list1.add("alluppercase");
+ equiv.put("case-sensitivity", list1);
+ satisfied = uni.isSatisfied(upper2, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(upper1, equiv);
+ assertEquals(false, satisfied);
+ uni.reset();
+ satisfied = uni.isSatisfied(upperall2, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(upperall1, equiv);
+ assertEquals(true, satisfied);
+ }
+
+ // slightly non-trivial unification =
+ // test if the grammatical number is the same
+ public void testUnificationNumber() {
+ Unifier uni = new Unifier();
+ Element sgElement = new Element("", false, false, false);
+ sgElement.setPosElement(".*[\\.:]sg:.*", true, false);
+ uni.setEquivalence("number", "singular", sgElement);
+ Element plElement = new Element("", false, false, false);
+ plElement.setPosElement(".*[\\.:]pl:.*", true, false);
+ uni.setEquivalence("number", "plural", plElement);
+
+ AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah", "mały");
+ AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah", "człowiek");
+
+ Map<String, List<String>> equiv = new HashMap<String, List<String>>();
+ List<String> list1 = new ArrayList<String>();
+ list1.add("singular");
+ equiv.put("number", list1);
+
+ boolean satisfied = uni.isSatisfied(sing1, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ assertEquals(true, satisfied);
+ uni.reset();
+
+ //for multiple readings - OR for interpretations, AND for tokens
+ AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
+ satisfied = uni.isSatisfied(sing1, equiv);
+ satisfied |= uni.isSatisfied(sing1a, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ assertEquals(true, satisfied);
+ uni.reset();
+
+ //check if any of the equivalences is there
+ list1.add("plural");
+ equiv.clear();
+ equiv.put("number", list1);
+ sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
+ satisfied = uni.isSatisfied(sing1, equiv);
+ satisfied |= uni.isSatisfied(sing1a, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ assertEquals(true, satisfied);
+ uni.reset();
+
+//now test all possible feature equivalences by leaving type blank
+ sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
+ equiv.clear();
+ equiv.put("number", null);
+ satisfied = uni.isSatisfied(sing1, equiv);
+ satisfied |= uni.isSatisfied(sing1a, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ assertEquals(true, satisfied);
+ uni.reset();
+
+//test non-agreeing tokens with blank types
+ satisfied = uni.isSatisfied(sing1a, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ assertEquals(false, satisfied);
+ uni.reset();
+ }
+
+//slightly non-trivial unification =
+ // test if the grammatical number is the same
+ public void testUnificationNumberGender() {
+ Unifier uni = new Unifier();
+ Element sgElement = new Element("", false, false, false);
+ sgElement.setPosElement(".*[\\.:]sg:.*", true, false);
+ uni.setEquivalence("number", "singular", sgElement);
+ Element plElement = new Element("", false, false, false);
+ plElement.setPosElement(".*[\\.:]pl:.*", true, false);
+ uni.setEquivalence("number", "plural", plElement);
+
+ Element femElement = new Element("", false, false, false);
+ femElement.setPosElement(".*[\\.:]f", true, false);
+ uni.setEquivalence("gender", "feminine", femElement);
+
+ Element mascElement = new Element("", false, false, false);
+ mascElement.setPosElement(".*[\\.:]m", true, false);
+ uni.setEquivalence("gender", "masculine", mascElement);
+
+ AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
+ AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:sg:blahblah:f", "mały");
+ AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:m", "mały");
+ AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
+
+ Map<String, List<String>> equiv = new HashMap<String, List<String>>();
+ equiv.put("number", null);
+ equiv.put("gender", null);
+
+ boolean satisfied = uni.isSatisfied(sing1, equiv);
+ satisfied |= uni.isSatisfied(sing1a, equiv);
+ satisfied |= uni.isSatisfied(sing1b, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ uni.startNextToken();
+ assertEquals(true, satisfied);
+ assertEquals("[mały/adj:sg:blahblah:m, człowiek/subst:sg:blahblah:m]", Arrays.toString(uni.getUnifiedTokens()));
+ uni.reset();
+ }
+
+ // checks if all tokens share the same set of
+ // features to be unified
+ public void testMultiplefeats() {
+ Unifier uni = new Unifier();
+ Element sgElement = new Element("", false, false, false);
+ sgElement.setPosElement(".*[\\.:]sg:.*", true, false);
+ uni.setEquivalence("number", "singular", sgElement);
+ Element plElement = new Element("", false, false, false);
+ plElement.setPosElement(".*[\\.:]pl:.*", true, false);
+ uni.setEquivalence("number", "plural", plElement);
+ Element femElement = new Element("", false, false, false);
+ femElement.setPosElement(".*[\\.:]f([\\.:].*)?", true, false);
+ uni.setEquivalence("gender", "feminine", femElement);
+ Element mascElement = new Element("", false, false, false);
+ mascElement.setPosElement(".*[\\.:]m([\\.:].*)?", true, false);
+ uni.setEquivalence("gender", "masculine", mascElement);
+ Element neutElement = new Element("", false, false, false);
+ neutElement.setPosElement(".*[\\.:]n([\\.:].*)?", true, false);
+ uni.setEquivalence("gender", "neutral", neutElement);
+
+ AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
+ AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
+ AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
+ AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony");
+ AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
+
+ Map<String, List<String>> equiv = new HashMap<String, List<String>>();
+ equiv.put("number", null);
+ equiv.put("gender", null);
+
+ boolean satisfied = uni.isSatisfied(sing1, equiv);
+ satisfied |= uni.isSatisfied(sing1a, equiv);
+ satisfied |= uni.isSatisfied(sing1b, equiv);
+ uni.startUnify();
+ satisfied &= uni.isSatisfied(sing2, equiv);
+ uni.startNextToken();
+ satisfied &= uni.isSatisfied(sing3, equiv);
+ uni.startNextToken();
+ assertEquals(false, satisfied);
+ uni.reset();
+
+ //now test the simplified interface
+ satisfied = true; //this must be true to start with...
+ satisfied &= uni.isUnified(sing1, equiv, false, false);
+ satisfied &= uni.isUnified(sing1a, equiv, false, false);
+ satisfied &= uni.isUnified(sing1b, equiv, false, true);
+ satisfied &= uni.isUnified(sing2, equiv, false, true);
+ satisfied &= uni.isUnified(sing3, equiv, false, true);
+ assertEquals(false, satisfied);
+ uni.reset();
+
+ sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
+ sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
+ sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
+
+ satisfied = true;
+ satisfied &= uni.isUnified(sing1a, equiv, false, false);
+ satisfied &= uni.isUnified(sing1b, equiv, false, true);
+ satisfied &= uni.isUnified(sing2, equiv, false, true);
+ assertEquals(true, satisfied);
+ assertEquals("[osobisty/adj:sg:nom.acc.voc:n:pos:aff, godło/subst:sg:nom.acc.voc:n]", Arrays.toString(uni.getFinalUnified()));
+ uni.reset();
+
+ //now test a case when the last reading doesn't match at all
+
+ sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
+ sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
+ AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
+ AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło");
+
+ satisfied = true;
+ satisfied &= uni.isUnified(sing1a, equiv, false, false);
+ satisfied &= uni.isUnified(sing1b, equiv, false, true);
+ satisfied &= uni.isUnified(sing2a, equiv, false, false);
+ satisfied &= uni.isUnified(sing2b, equiv, false, true);
+ assertEquals(true, satisfied);
+ assertEquals("[osobisty/adj:sg:nom.acc.voc:n:pos:aff, godło/subst:sg:nom.acc.voc:n]", Arrays.toString(uni.getFinalUnified()));
+ uni.reset();
+
+ }
+
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java
new file mode 100644
index 0000000..93cc3ea
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * @author Daniel Naber
+ */
+public class CompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.POLISH);
+ rule = new CompoundRule(null);
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "Nie róbmy nic na łapu-capu.");
+ check(0, "Jedzmy kogel-mogel.");
+ // incorrect sentences:
+ check(1, "bim bom", new String[]{"bim-bom"});
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java
new file mode 100644
index 0000000..52bcd6f
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class PolishUnpairedBracketsRuleTest extends TestCase {
+
+ public void testRulePolish() throws IOException {
+ PolishUnpairedBracketsRule rule = new PolishUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.POLISH);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.POLISH);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(To jest zdanie do testowania)."));
+ assertEquals(0, matches.length);
+ // correct sentences:
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("Piosenka ta trafiła na wiele list \"Best of...\", włączając w to te, które zostały utworzone przez magazyn Rolling Stone."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("A \"B\" C."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("\"A\" B \"C\"."));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("W tym zdaniu jest niesparowany „cudzysłów."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java
new file mode 100644
index 0000000..070f3f2
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java
@@ -0,0 +1,51 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+public class PolishWordRepeatRuleTest extends TestCase {
+
+ /*
+ * Test method for 'de.danielnaber.languagetool.rules.pl.PolishWordRepeatRule.match(AnalyzedSentence)'
+ */
+ public void testRule() throws IOException {
+ final PolishWordRepeatRule rule = new PolishWordRepeatRule(null);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.POLISH);
+ //correct
+ matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie próbne."));
+ assertEquals(0, matches.length);
+ //repeated prepositions, don't count'em
+ matches = rule.match(langTool.getAnalyzedSentence("Na dyskotece tańczył jeszcze, choć był na bani."));
+ assertEquals(0, matches.length);
+ //incorrect
+ matches = rule.match(langTool.getAnalyzedSentence("Był on bowiem pięknym strzelcem bowiem."));
+ assertEquals(1, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("Mówiła długo, żeby tylko mówić długo."));
+ assertEquals(2, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java
new file mode 100644
index 0000000..5689a72
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ *
+ * Simple tests for rules/pl/SimpleReplaceRule class
+ *
+ * @author Ionuț Păduraru
+ */
+public class SimpleReplaceRuleTest extends TestCase {
+
+ private SimpleReplaceRule rule;
+ private JLanguageTool langTool;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ rule = new SimpleReplaceRule(TestTools.getMessages("pl"));
+ langTool = new JLanguageTool(Language.POLISH);
+ }
+
+ public void testRule() throws IOException {
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Wszystko w porządku.")).length);
+
+ // incorrect sentences:
+
+ // at the beginning of a sentence (Romanian replace rule is case-sensitive)
+ checkSimpleReplaceRule("Piaty przypadek.", "Piąty");
+ // inside sentence
+ checkSimpleReplaceRule("To piaty przypadek.", "piąty");
+ }
+
+ /**
+ * Check if a specific replace rule applies.
+ *
+ * @param sentence
+ * the sentence containing the incorrect/misspeled word.
+ * @param word
+ * the word that is correct (the suggested replacement).
+ * @throws IOException
+ */
+ private void checkSimpleReplaceRule(String sentence, String word)
+ throws IOException {
+ RuleMatch[] matches;
+ matches = rule.match(langTool.getAnalyzedSentence(sentence));
+ assertEquals("Invalid matches.length while checking sentence: "
+ + sentence, 1, matches.length);
+ assertEquals("Invalid replacement count wile checking sentence: "
+ + sentence, 1, matches[0].getSuggestedReplacements().size());
+ assertEquals("Invalid suggested replacement while checking sentence: "
+ + sentence, word, matches[0].getSuggestedReplacements().get(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java
new file mode 100644
index 0000000..f6f6200
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ro;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * Tests for {@link CompoundRule} class.
+ *
+ * @author Ionuț Păduraru
+ */
+public class CompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.ROMANIAN);
+ rule = new CompoundRule(TestTools.getMessages("ro"));
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "Au plecat câteșitrei.");
+ // incorrect sentences:
+ check(1, "câte și trei", new String[] { "câteșitrei" });
+ check(1, "Câte și trei", new String[] { "Câteșitrei" });
+ check(1, "câte-și-trei", new String[] { "câteșitrei" });
+
+ check(1, "tus trei", new String[] { "tustrei" });
+ check(1, "tus-trei", new String[] { "tustrei" });
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java
new file mode 100644
index 0000000..eab518f
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java
@@ -0,0 +1,55 @@
+package de.danielnaber.languagetool.rules.ro;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.rules.patterns.PatternRuleTest;
+
+/**
+ *
+ * @author Ionuț Păduraru
+ * @since 07.05.2009 21:07:12
+ *
+ * This testcase is not for actual rules but for PatternRuleTest to ensure proper
+ * xml cleaning: there is a romanian rule where "<<" is used; we want
+ * "X<marker><<</marker>Y" to be tranformed into "X<<Y", not into "XY"
+ * (see rule id GHILIMELE_DUBLE_INTERIOR_INCEPUT in ro/grammar.xml).
+ *
+ */
+public class RomanianPatternRuleTest extends TestCase {
+
+ private PatternRuleTestWrapper patternRuleTestWrapper = new PatternRuleTestWrapper();
+
+ /**
+ * wrapper on PatternRuleTestWrapper to expose cleanXML method
+ *
+ * @author Ionuț Păduraru
+ * @since 07.05.2009 21:11:01
+ */
+ private static class PatternRuleTestWrapper extends PatternRuleTest {
+ @Override
+ public String cleanXML(String str) {
+ return super.cleanXML(str);
+ }
+ }
+
+ public String cleanXML(String str) {
+ return patternRuleTestWrapper.cleanXML(str);
+ }
+
+ /**
+ * Ensure proper xml cleanining in PatternRuleTest
+ *
+ * @author Ionuț Păduraru
+ * @since 07.05.2009 21:11:30
+ * @throws Exception
+ */
+ public void testCleanXML() throws Exception {
+ assertEquals(cleanXML("1<mark>2"), "12");
+ assertEquals(cleanXML("1</mark>2"), "12");
+ assertEquals(cleanXML("1<</mark>2"), "1<2");
+ assertEquals(cleanXML("<</mark>2"), "<2");
+ assertEquals(cleanXML("></mark>2"), ">2");
+ assertEquals(cleanXML("1<mark>abc</mark>2"), "1abc2");
+ assertEquals(cleanXML("1<mark><<</mark>2"), "1<<2");
+ assertEquals(cleanXML("1<mark>>></mark>2"), "1>>2");
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java
new file mode 100644
index 0000000..76d7549
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java
@@ -0,0 +1,153 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ro;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ *
+ * Simple tests for rules/ro/SimpleReplaceRule class
+ *
+ * @author Ionuț Păduraru
+ */
+public class SimpleReplaceRuleTest extends TestCase {
+
+ private SimpleReplaceRule rule;
+ private JLanguageTool langTool;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ rule = new SimpleReplaceRule(TestTools.getMessages("ro"));
+ langTool = new JLanguageTool(Language.ROMANIAN);
+ }
+
+ /**
+ * Make sure that the suggested word is not the same as the wrong word
+ */
+ public void testInvalidSuggestion() {
+ List<String> invalidSuggestions = new ArrayList<String>();
+ List<Map<String,String>> wrongWords = rule.getWrongWords();
+ for (Map<String, String> ruleEntry : wrongWords) {
+ for (String fromWord : ruleEntry.keySet()) {
+ String toWord = ruleEntry.get(fromWord);
+ if (toWord == null || fromWord.equals(toWord)) {
+ invalidSuggestions.add(toWord);
+ }
+ }
+ }
+ if (!invalidSuggestions.isEmpty()) {
+ fail("Invalid suggestions found for: " + Arrays.toString(invalidSuggestions.toArray(new String[]{})));
+ }
+ }
+ public void testRule() throws IOException {
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Paisprezece case.")).length);
+
+ // incorrect sentences:
+
+ // at the beginning of a sentence (Romanian replace rule is case-sensitive)
+ checkSimpleReplaceRule("Patrusprezece case.", "Paisprezece");
+ // inside sentence
+ checkSimpleReplaceRule("Satul are patrusprezece case.", "paisprezece");
+ checkSimpleReplaceRule("Satul are (patrusprezece) case.", "paisprezece");
+ checkSimpleReplaceRule("Satul are «patrusprezece» case.", "paisprezece");
+
+ checkSimpleReplaceRule("El are șasesprezece ani.", "șaisprezece");
+ checkSimpleReplaceRule("El a luptat pentru întâiele cărți.", "întâile");
+ checkSimpleReplaceRule("El are cinsprezece cărți.", "cincisprezece");
+ checkSimpleReplaceRule("El a fost patruzecioptist.", "pașoptist");
+ checkSimpleReplaceRule("M-am adresat întâiei venite.", "întâii");
+ checkSimpleReplaceRule("M-am adresat întâielor venite.", "întâilor");
+ checkSimpleReplaceRule("A ajuns al douăzecelea.", "douăzecilea");
+ checkSimpleReplaceRule("A ajuns al zecilea.", "zecelea");
+ checkSimpleReplaceRule("A primit jumate de litru de lapte.", "jumătate");
+
+ // multiple words / compounds
+ // space-delimited
+ checkSimpleReplaceRule("aqua forte", "acvaforte");
+ checkSimpleReplaceRule("aqua forte.", "acvaforte");
+ checkSimpleReplaceRule("A folosit «aqua forte».", "acvaforte");
+ checkSimpleReplaceRule("Aqua forte.", "Acvaforte");
+ checkSimpleReplaceRule("este aqua forte", "acvaforte");
+ checkSimpleReplaceRule("este aqua forte.", "acvaforte");
+ checkSimpleReplaceRule("este Aqua Forte.", "Acvaforte");
+ checkSimpleReplaceRule("este AquA Forte.", "Acvaforte");
+ checkSimpleReplaceRule("A primit jumate de litru de lapte și este aqua forte.", "jumătate", "acvaforte");
+ checkSimpleReplaceRule("du-te vino", "du-te-vino");
+ // dash-delimited
+ checkSimpleReplaceRule("cou-boi", "cowboy");
+ checkSimpleReplaceRule("cow-boy", "cowboy");
+ checkSimpleReplaceRule("cau-boi", "cowboy");
+ checkSimpleReplaceRule("Cau-boi", "Cowboy");
+ checkSimpleReplaceRule("cowboy"); // correct, no replacement
+ checkSimpleReplaceRule("Iată un cau-boi", "cowboy");
+ checkSimpleReplaceRule("Iată un cau-boi.", "cowboy");
+ checkSimpleReplaceRule("Iată un (cau-boi).", "cowboy");
+ checkSimpleReplaceRule("văcar=cau-boi", "cowboy");
+
+
+ // multiple suggestions
+ checkSimpleReplaceRule("A fost adăogită o altă regulă.", "adăugită/adăugată");
+ checkSimpleReplaceRule("A venit adinioarea.", "adineaori/adineauri");
+
+ // words with multiple wrong forms
+ checkSimpleReplaceRule("A pus axterix.", "asterisc");
+ checkSimpleReplaceRule("A pus axterics.", "asterisc");
+ checkSimpleReplaceRule("A pus asterics.", "asterisc");
+ }
+
+ /**
+ * Check if a specific replace rule applies.
+ *
+ * @param sentence
+ * the sentence containing the incorrect/misspeled word.
+ * @param words
+ * the words that are correct (the suggested replacement). Use "/" to separate multiple forms.
+ * @throws IOException
+ */
+ private void checkSimpleReplaceRule(String sentence, String... words)
+ throws IOException {
+ RuleMatch[] matches;
+ matches = rule.match(langTool.getAnalyzedSentence(sentence));
+ assertEquals("Invalid matches.length while checking sentence: "
+ + sentence, words.length, matches.length);
+ for (int i = 0; i < words.length; i++) {
+ String word = words[i];
+ String[] replacements = word.split("\\/");
+ assertEquals("Invalid replacement count wile checking sentence: "
+ + sentence, replacements.length, matches[i].getSuggestedReplacements().size());
+ for (int j = 0; j < replacements.length; j++) {
+ assertEquals("Invalid suggested replacement while checking sentence: "
+ + sentence, replacements[j], matches[i].getSuggestedReplacements().get(j));
+ }
+ }
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java
new file mode 100644
index 0000000..7b293e2
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ru;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/*
+ * RuSimpleReplaceRuleTest
+ * @ author Yakov Reztsov
+ *
+ */
+
+
+public class RuSimpleReplaceRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ RuSimpleReplaceRule rule = new RuSimpleReplaceRule(TestTools.getMessages("ru"));
+
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.RUSSIAN);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Книга была порвана."));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Книга была порвата."));
+ assertEquals(1, matches.length);
+ assertEquals(1, matches[0].getSuggestedReplacements().size());
+ assertEquals("порвана", matches[0].getSuggestedReplacements().get(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java
new file mode 100644
index 0000000..a4552f8
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ru;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.CompoundRuleTestAbs;
+
+/**
+ * Russian Compound rule test
+ * @author Yakov Reztsov
+ * Based on German Compound rule test
+ * @author Daniel Naber
+ */
+public class RussianCompoundRuleTest extends CompoundRuleTestAbs {
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ langTool = new JLanguageTool(Language.RUSSIAN);
+ rule = new RussianCompoundRule(null);
+ }
+
+ public void testRule() throws IOException {
+ // correct sentences:
+ check(0, "Он вышел из-за дома.");
+ // Both suggestion for some words:
+ check(0, "естественно-научный");
+ // incorrect sentences:
+ check(1, "из за", new String[]{"из-за"});
+ check(1, "нет нет из за да да");
+ //FIXME: suggestions / longest match
+ check(1, "Ростов на Дону", new String[]{"Ростов-на-Дону"});
+ // no hyphen suggestion for some words:
+ check(1, "кругло суточный", new String[]{"круглосуточный"});
+ // also accept incorrect upper/lowercase spelling:
+ check(1, "Ростов на дону", new String[]{"Ростов-на-дону"});
+ // also detect an error if only some of the hyphens are missing:
+ check(1, "Ростов-на Дону", new String[]{"Ростов-на-Дону"});
+ // first part is a single character:
+ check(0, "во-первых");
+ check(1, "во первых", new String[]{"во-первых"});
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java
new file mode 100644
index 0000000..8375efc
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ru;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import junit.framework.TestCase;
+
+public class RussianUnpairedBracketsRuleTest extends TestCase {
+
+ public void testRulePolish() throws IOException {
+ RussianUnpairedBracketsRule rule = new RussianUnpairedBracketsRule(TestTools
+ .getEnglishMessages(), Language.RUSSIAN);
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.RUSSIAN);
+ // correct sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("(О жене и детях не беспокойся, я беру их на свои руки)."));
+ assertEquals(0, matches.length);
+ // correct sentences:
+ matches = rule
+ .match(langTool
+ .getAnalyzedSentence("Позже выходит другая «южная поэма» «Бахчисарайский фонтан» (1824)."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("А \"б\" Д."));
+ assertEquals(0, matches.length);
+ matches = rule.match(langTool.getAnalyzedSentence("а), б), Д)..., ДД), аа) и 1а)"));
+ assertEquals(0, matches.length);
+ // incorrect sentences:
+ matches = rule.match(langTool
+ .getAnalyzedSentence("В таком ключе был начат в мае 1823 в Кишинёве роман в стихах «Евгений Онегин."));
+ assertEquals(1, matches.length);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java
new file mode 100644
index 0000000..e1f2fd0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.uk;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+public class PunctuationCheckRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ PunctuationCheckRule rule = new PunctuationCheckRule(TestTools.getEnglishMessages());
+
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.UKRAINIAN);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Дві, коми. Ось: дві!!!"));
+ assertEquals(0, matches.length);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("- Це ваша пряма мова?!!"));
+ assertEquals(0, matches.length);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Дві,- коми!.."));
+ assertEquals(0, matches.length);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Два пробіли.")); // поки що ігноруємо - не царська це справа :)
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Дві крапки.."));
+ assertEquals(1, matches.length);
+ assertEquals(1, matches[0].getSuggestedReplacements().size());
+ assertEquals(".", matches[0].getSuggestedReplacements().get(0));
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Дві,, коми."));
+ assertEquals(1, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Не там ,кома."));
+ assertEquals(1, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Двокрапка:- з тире."));
+ assertEquals(1, matches.length);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java
new file mode 100644
index 0000000..98822af
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.uk;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+
+public class SimpleReplaceRuleTest extends TestCase {
+
+ public void testRule() throws IOException {
+ SimpleReplaceRule rule = new SimpleReplaceRule(TestTools.getEnglishMessages());
+
+ RuleMatch[] matches;
+ JLanguageTool langTool = new JLanguageTool(Language.UKRAINIAN);
+
+ // correct sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Ці рядки повинні збігатися."));
+ assertEquals(0, matches.length);
+
+ // incorrect sentences:
+ matches = rule.match(langTool.getAnalyzedSentence("Ці рядки повинні співпадати."));
+ assertEquals(1, matches.length);
+ assertEquals(1, matches[0].getSuggestedReplacements().size());
+ assertEquals("збігатися", matches[0].getSuggestedReplacements().get(0));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java
new file mode 100644
index 0000000..1255bcd
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java
@@ -0,0 +1,117 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.server;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.XMLValidator;
+import de.danielnaber.languagetool.tools.StringTools;
+
+public class HTTPServerTest extends TestCase {
+
+ public void testHTTPServer() {
+ HTTPServer server = new HTTPServer();
+ try {
+ server.run();
+ // no error:
+ String enc = "UTF-8";
+ assertEquals("<?xml version=\"1.0\" encoding=\""+enc+"\"?>\n<matches>\n</matches>\n", check(Language.GERMAN, ""));
+ assertEquals("<?xml version=\"1.0\" encoding=\""+enc+"\"?>\n<matches>\n</matches>\n", check(Language.GERMAN, "Ein kleiner test"));
+ // one error:
+ assertTrue(check(Language.GERMAN, "ein kleiner test").indexOf("UPPERCASE_SENTENCE_START") != -1);
+ // two errors:
+ String result = check(Language.GERMAN, "ein kleiner test. Und wieder Erwarten noch was: \u00f6\u00e4\u00fc\u00df.");
+ assertTrue(result.indexOf("UPPERCASE_SENTENCE_START") != -1);
+ assertTrue(result.indexOf("WIEDER_WILLEN") != -1);
+ assertTrue("Expected special chars, got: '" + result+ "'",
+ result.indexOf("\u00f6\u00e4\u00fc\u00df") != -1); // special chars are intact
+ XMLValidator validator = new XMLValidator();
+ validator.validateXMLString(result, JLanguageTool.getDataBroker().getResourceDir() + "/api-output.dtd", "matches");
+ validator.checkSimpleXMLString(result);
+ //System.err.println(result);
+ // make sure XML chars are escaped in the result to avoid invalid XML
+ // and XSS attacks:
+ assertTrue(check(Language.GERMAN, "bla <script>").indexOf("<script>") == -1);
+
+ // other tests for special characters
+ String germanSpecialChars = check(Language.GERMAN, "ein kleiner test. Und wieder Erwarten noch was: öäüß öäüß.");
+ assertTrue("Expected special chars, got: '" + germanSpecialChars+ "'", germanSpecialChars.contains("öäüß"));
+ String romanianSpecialChars = check(Language.ROMANIAN, "bla bla șțîâă șțîâă și câteva caractere speciale");
+ assertTrue("Expected special chars, got: '" + romanianSpecialChars+ "'", romanianSpecialChars.contains("șțîâă"));
+ String polishSpecialChars = check(Language.POLISH, "Mówiła długo, żeby tylko mówić mówić długo.");
+ assertTrue("Expected special chars, got: '" + polishSpecialChars+ "'", polishSpecialChars.contains("mówić"));
+ // test http POST
+ assertTrue(checkByPOST(Language.ROMANIAN, "greșit greșit").indexOf("greșit") != -1);
+ // test supported language listing
+ URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT + "/Languages");
+ String languagesXML = StringTools.streamToString((InputStream)url.getContent());
+ if (!languagesXML.contains("Romanian") || !languagesXML.contains("English"))
+ fail("Error getting supported languages: " + languagesXML);
+ // tests for "&" character
+ assertTrue(check(Language.ENGLISH, "Me & you you").contains("&"));
+ // tests for mother tongue (copy from link {@link FalseFriendRuleTest})
+ assertTrue(check(Language.ENGLISH, Language.GERMAN, "We will berate you").indexOf("BERATE") != -1);
+ assertTrue(check(Language.GERMAN, Language.ENGLISH, "Man sollte ihn nicht so beraten.").indexOf("BERATE") != -1);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ } finally {
+ server.stop();
+ }
+ }
+
+ private String check(Language lang, String text) throws IOException {
+ return check(lang, null, text);
+ }
+
+ private String check(Language lang, Language motherTongue, String text) throws IOException {
+ String urlOptions = "/?language=" + lang.getShortName();
+ urlOptions += "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like polish, romanian, etc
+ if (null != motherTongue)
+ urlOptions += "&motherTongue="+motherTongue.getShortName();
+ URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT + urlOptions);
+ InputStream stream = (InputStream)url.getContent();
+ String result = StringTools.streamToString(stream);
+ return result;
+ }
+
+ /**
+ * Same as {@link #check(Language, String)} but using HTTP POST method instead of GET
+ */
+ private String checkByPOST(Language lang, String text) throws IOException {
+ String postData = "language=" + lang.getShortName();
+ postData += "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like polish, romanian, etc
+ URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT);
+ URLConnection connection = url.openConnection();
+ connection.setDoOutput(true);
+ OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream());
+ wr.write(postData);
+ wr.flush();
+ String result = StringTools.streamToString(connection.getInputStream());
+ return result;
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java
new file mode 100644
index 0000000..987038e
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java
@@ -0,0 +1,51 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.en;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class EnglishSynthesizerTest extends TestCase {
+
+ private final AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+ public final void testSynthesizeStringString() throws IOException {
+ EnglishSynthesizer synth = new EnglishSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"),
+ "blablabla").length, 0);
+
+ assertEquals("[were, was]", Arrays.toString(synth.synthesize(dummyToken("be"), "VBD")));
+ assertEquals("[presidents]", Arrays.toString(synth.synthesize(dummyToken("president"), "NNS")));
+ assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD")));
+ assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD", false)));
+ //with regular expressions
+ assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD", true)));
+ assertEquals("[tested, testing]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD|VBG", true)));
+ //with special indefinite article
+ assertEquals("[a university, the university]", Arrays.toString(synth.synthesize(dummyToken("university"), "+DT", false)));
+ assertEquals("[an hour, the hour]", Arrays.toString(synth.synthesize(dummyToken("hour"), "+DT", false)));
+ assertEquals("[an hour]", Arrays.toString(synth.synthesize(dummyToken("hour"), "+INDT", false)));
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java
new file mode 100644
index 0000000..4558d75
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.es;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class SpanishSynthesizerTest extends TestCase {
+ private final AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+ public final void testSynthesizeStringString() throws IOException {
+ SpanishSynthesizer synth = new SpanishSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"),
+ "blablabla").length, 0);
+
+ assertEquals("[temiera, temiese]", Arrays.toString(synth.synthesize(dummyToken("temer"), "VMSI3S0")));
+ assertEquals("[presidentes]", Arrays.toString(synth.synthesize(dummyToken("presidente"), "NCMP000")));
+ assertEquals("[contéis]", Arrays.toString(synth.synthesize(dummyToken("contar"), "VMSP2P0")));
+ assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM")));
+ assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM", false)));
+ //with regular expressions
+ assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM", true)));
+ assertEquals("[probando, probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM|VMG0000", true)));
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java
new file mode 100644
index 0000000..8f206c1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.nl;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class DutchSynthesizerTest extends TestCase {
+
+ private final AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+ public final void testSynthesizeStringString() throws IOException {
+ DutchSynthesizer synth = new DutchSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"),
+ "blablabla").length, 0);
+
+ assertEquals("[zwommen]", Arrays.toString(synth.synthesize(dummyToken("zwemmen"), "VBh")));
+ assertEquals("[Afro-Surinamers]", Arrays.toString(synth.synthesize(dummyToken("Afro-Surinamer"), "NN2")));
+ assertEquals("[hebt, heeft]", Arrays.toString(synth.synthesize(dummyToken("hebben"), "VB3", true)));
+ //with regular expressions
+ assertEquals("[doorgeseind]", Arrays.toString(synth.synthesize(dummyToken("doorseinen"), "VBp", true)));
+ assertEquals("[doorsein, doorseint, doorseinden, doorseinde, doorseinen, doorgeseind, doorgeseinde]", Arrays.toString(synth.synthesize(dummyToken("doorseinen"), "VB.*", true)));
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java
new file mode 100644
index 0000000..00092d0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.pl;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class PolishSynthesizerTest extends TestCase {
+ private AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+
+ public final void testSynthesizeString() throws IOException {
+ PolishSynthesizer synth = new PolishSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"), "blablabla").length, 0);
+
+ assertEquals("[Aaru]", Arrays.toString(synth.synthesize(dummyToken("Aar"), "subst:sg:gen:m3")));
+ assertEquals("[Abchazem]", Arrays.toString(synth.synthesize(dummyToken("Abchaz"), "subst:sg:inst:m1")));
+ assertEquals("[nieduży]", Arrays.toString(synth.synthesize(dummyToken("duży"), "adj:sg:nom:m:pos:neg")));
+ assertEquals("[miała]", Arrays.toString(synth.synthesize(dummyToken("mieć"), "verb:praet:sg:ter:f:perf")));
+ assertEquals("[brzydziej]", Arrays.toString(synth.synthesize(dummyToken("brzydko"), "adv:comp")));
+ //with regular expressions
+ assertEquals("[tonera]", Arrays.toString(synth.synthesize(dummyToken("toner"), ".*sg.*[\\.:]gen.*", true)));
+ assertEquals("[niedużego, nieduży, niedużemu, niedużego, niedużym, nieduży, nieduży]", Arrays.toString(synth.synthesize(dummyToken("duży"), "adj:sg.*(m[0-9]?|m.n):pos:neg", true)));
+ assertEquals("[miałabym, miałbym, miałabyś, miałbyś, miałaby, miałby, miałoby, miałam, miałem, miałaś, miałeś, miała, miał, miało]",
+ Arrays.toString(synth.synthesize(dummyToken("mieć"), ".*praet:sg.*", true)));
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java
new file mode 100644
index 0000000..1cfc572
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java
@@ -0,0 +1,83 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.ro;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class RomanianSynthesizerTest extends TestCase {
+
+ private final AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+
+ /**
+ *
+ * @author Ionuț Păduraru
+ * @since 08.03.2009 18:44:25
+ * @throws IOException
+ */
+ public final void testSynthesizeStringString() throws IOException {
+ RomanianSynthesizer synth = new RomanianSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"), "blablabla").length, 0);
+
+ // a alege
+ // forma de infinitiv
+ assertEquals("[alege]", Arrays.toString(synth.synthesize(
+ dummyToken("alege"), "V000000f00")));
+ // conjunctiv, pers a doua plural
+ assertEquals("[alegeți]", Arrays.toString(synth.synthesize(
+ dummyToken("alege"), "V0p2000cz0")));
+
+ // a fi
+ assertEquals("[fi]", Arrays.toString(synth.synthesize(
+ dummyToken("fi"), "V000000f0f")));
+ // indicativ prezent, pers a doua plural
+ assertEquals("[sunteți]", Arrays.toString(synth.synthesize(
+ dummyToken("fi"), "V0p2000izf")));
+ // indicativ prezent, pers a treia plural
+ assertEquals("[sunt]", Arrays.toString(synth.synthesize(
+ dummyToken("fi"), "V0p3000izf")));
+ // indicativ prezent, pers întâi plural
+ assertEquals("[sunt]", Arrays.toString(synth.synthesize(
+ dummyToken("fi"), "V0s1000izf")));
+ // RegExp
+ // indicativ prezent, pers a doua plural SAU indicativ prezent, pers a treia plural
+ assertEquals("[sunteți, sunt]", Arrays.toString(synth.synthesize(
+ dummyToken("fi"), "V0p2000izf|V0p3000izf", true)));
+
+ // diverse
+ // indicativ, mai mult ca perfect, persoana întâi, plural
+ assertEquals("[merseserăm]", Arrays.toString(synth.synthesize(
+ dummyToken("merge"), "V0p1000im0")));
+ // indicativ, mai mult ca perfect, persoana întâi, singular
+ assertEquals("[mersesem]", Arrays.toString(synth.synthesize(
+ dummyToken("merge"), "V0s1000im0")));
+ assertEquals("[legătura]", Arrays.toString(synth.synthesize(
+ dummyToken("legătură"), "Sfs3aac000")));
+ assertEquals("[legătură]", Arrays.toString(synth.synthesize(
+ dummyToken("legătură"), "Sfs3anc000")));
+
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java
new file mode 100644
index 0000000..4530597
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.synthesis.sk;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+
+public class SlovakSynthesizerTest extends TestCase {
+
+ private final AnalyzedToken dummyToken(String tokenStr) {
+ return new AnalyzedToken(tokenStr, tokenStr, tokenStr);
+ }
+ public final void testSynthesizeStringString() throws IOException {
+ SlovakSynthesizer synth = new SlovakSynthesizer();
+ assertEquals(synth.synthesize(dummyToken("blablabla"),
+ "blablabla").length, 0);
+
+ assertEquals("[časopisu]", Arrays.toString(synth.synthesize(dummyToken("časopis"), "SSis2")));
+ //with regular expressions
+ assertEquals("[časopisy, časopisov, časopisom, časopisy, časopisy, časopisoch, časopismi, časopis, časopisu, časopisu, časopis, časopis, časopise, časopisom]", Arrays.toString(synth.synthesize(dummyToken("časopis"), "SS.*", true)));
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java
new file mode 100644
index 0000000..432e8da
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java
@@ -0,0 +1,48 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+import junit.framework.TestCase;
+
+/**
+ * @author Daniel Naber
+ */
+public class ManualTaggerTest extends TestCase {
+
+ private static final String MANUAL_DICT_FILENAME = "/de/added.txt";
+
+ public void testManualTagger() throws IOException {
+ ManualTagger mt = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream(MANUAL_DICT_FILENAME));
+ assertNull(mt.lookup(""));
+ assertNull(mt.lookup("gibtsnicht"));
+
+ assertEquals("[Trotz, SUB:NOM:SIN:MAS]", Arrays.toString(mt.lookup("Trotz")));
+ // lookup is case sensitive:
+ assertNull(mt.lookup("trotz"));
+
+ assertEquals("[Interesse, SUB:NOM:PLU:NEU, Interesse, SUB:AKK:PLU:NEU, Interesse, SUB:GEN:PLU:NEU]",
+ Arrays.toString(mt.lookup("Interessen")));
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java
new file mode 100644
index 0000000..95b37ce
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ca;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class CatalanTaggerTest extends TestCase {
+
+ private CatalanTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new CatalanTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Sóc un home molt honrat.",
+ "Sóc/[ser]VSIP1S0 -- un/[un]DI0MS0|un/[un]PI0MS000 -- home/[home]I|home/[home]NCMS000 -- molt/[molt]DI0MS0|molt/[molt]PI0MS000|molt/[molt]RG -- honrat/[honrar]VMP00SM", tokenizer, tagger);
+// Need to fix the separator character: al - a+el+SP+DA
+// TestTools.myAssert("Frase recitada al matí.",
+// "Frase/[frase]NCFS000 -- recitada/[recitar]VMP00SF -- al/[a]el+SP+DA -- matí/[matar]VMIS1S0|[matí]NCMS000", tokenizer, tagger);
+ TestTools.myAssert("blablabla","blablabla/[null]null", tokenizer, tagger);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java
new file mode 100644
index 0000000..7aa2e9b
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java
@@ -0,0 +1,59 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.cs;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class CzechTaggerTest extends TestCase {
+
+ private CzechTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new CzechTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+/* public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }*/
+
+ public void testTagger() throws IOException {
+ //TestTools.myAssert("Ukončuje větu rozkazovací či zvolací.", "Ukončuje/[ukončovat]k5eAaImIp3nS větu/[věta]k1gFnSc4 rozkazovací/[rozkazovací]k2eAgFnPc1d1 či/[či]k8 zvolací/[zvolací]k2eAgFnPc1d1", tokenizer, tagger);
+ //TestTools.myAssert("Nahrazuje vynechané písmeno, používá se pro zkracování letopočtů.", "Nahrazuje/[nahrazovat]k5eAaImIp3nS vynechané/[vynechaný]k2eAgFnPc1d1 písmeno/[písmeno]k1gNnSc1|písmeno/[písmena]k1gFnSc5 používá/[používat]k5eAaImIp3nS se/[se]k3c4 pro/[pro]k7 zkracování/[zkracování]k1gNnPc1 letopočtů/[letopočet]k1gInPc2", tokenizer, tagger);
+
+ TestTools.myAssert("Nejkratší věta.",
+ "Nejkratší/[krátký]k2eAgFnPc1d3|Nejkratší/[krátký]k2eAgFnPc4d3|Nejkratší/[krátký]k2eAgFnPc5d3|Nejkratší/[krátký]k2eAgFnSc1d3|Nejkratší/[krátký]k2eAgFnSc2d3|Nejkratší/[krátký]k2eAgFnSc3d3|Nejkratší/[krátký]k2eAgFnSc4d3|Nejkratší/[krátký]k2eAgFnSc5d3|Nejkratší/[krátký]k2eAgFnSc6d3|Nejkratší/[krátký]k2eAgFnSc7d3|Nejkratší/[krátký]k2eAgInPc1d3|Nejkratší/[krátký]k2eAgInPc4d3|Nejkratší/[krátký]k2eAgInPc5d3|Nejkratší/[krátký]k2eAgInSc1d3|Nejkratší/[krátký]k2eAgInSc4d3|Nejkratší/[krátký]k2eAgInSc5d3|Nejkratší/[krátký]k2eAgMnPc1d3|Nejkratší/[krátký]k2eAgMnPc4d3|Nejkratší/[krátký]k2eAgMnPc5d3|Nejkratší/[krátký]k2eAgMnSc1d3|Nejkratší/[krátký]k2eAgMnSc5d3|Nejkratší/[krátký]k2eAgNnPc1d3|Nejkratší/[krátký]k2eAgNnPc4d3|Nejkratší/[krátký]k2eAgNnPc5d3|Nejkratší/[krátký]k2eAgNnSc1d3|Nejkratší/[krátký]k2eAgNnSc4d3|Nejkratší/[krátký]k2eAgNnSc5d3 -- věta/[věta]k1gFnSc1", tokenizer, tagger);
+ TestTools.myAssert("zvolací.",
+ "zvolací/[zvolací]k2eAgFnPc1d1|zvolací/[zvolací]k2eAgFnPc4d1|zvolací/[zvolací]k2eAgFnPc5d1|zvolací/[zvolací]k2eAgFnSc1d1|zvolací/[zvolací]k2eAgFnSc2d1|zvolací/[zvolací]k2eAgFnSc3d1|zvolací/[zvolací]k2eAgFnSc4d1|zvolací/[zvolací]k2eAgFnSc5d1|zvolací/[zvolací]k2eAgFnSc6d1|zvolací/[zvolací]k2eAgFnSc7d1|zvolací/[zvolací]k2eAgInPc1d1|zvolací/[zvolací]k2eAgInPc4d1|zvolací/[zvolací]k2eAgInPc5d1|zvolací/[zvolací]k2eAgInSc1d1|zvolací/[zvolací]k2eAgInSc4d1|zvolací/[zvolací]k2eAgInSc5d1|zvolací/[zvolací]k2eAgMnPc1d1|zvolací/[zvolací]k2eAgMnPc4d1|zvolací/[zvolací]k2eAgMnPc5d1|zvolací/[zvolací]k2eAgMnSc1d1|zvolací/[zvolací]k2eAgMnSc5d1|zvolací/[zvolací]k2eAgNnPc1d1|zvolací/[zvolací]k2eAgNnPc4d1|zvolací/[zvolací]k2eAgNnPc5d1|zvolací/[zvolací]k2eAgNnSc1d1|zvolací/[zvolací]k2eAgNnSc4d1|zvolací/[zvolací]k2eAgNnSc5d1", tokenizer, tagger);
+ TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java
new file mode 100644
index 0000000..7bf3c14
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java
@@ -0,0 +1,117 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.de;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+import junit.framework.TestCase;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+/**
+ * @author Daniel Naber
+ */
+public class GermanTaggerTest extends TestCase {
+
+ public void testTagger() throws IOException {
+ GermanTagger tagger = new GermanTagger();
+ AnalyzedGermanTokenReadings aToken = tagger.lookup("Haus");
+ assertEquals("Haus[SUB:AKK:SIN:NEU, SUB:DAT:SIN:NEU, SUB:NOM:SIN:NEU]", aToken.toSortedString());
+ assertEquals("Haus", aToken.getReadings().get(0).getLemma());
+ assertEquals("Haus", aToken.getReadings().get(1).getLemma());
+ assertEquals("Haus", aToken.getReadings().get(2).getLemma());
+
+ aToken = tagger.lookup("Hauses");
+ assertEquals("Hauses[SUB:GEN:SIN:NEU]", aToken.toSortedString());
+ assertEquals("Haus", aToken.getReadings().get(0).getLemma());
+
+ aToken = tagger.lookup("hauses");
+ assertNull(aToken);
+
+ aToken = tagger.lookup("Groß");
+ assertNull(aToken);
+
+ aToken = tagger.lookup("großer");
+ assertEquals("großer[ADJ:DAT:SIN:FEM:GRU:SOL, ADJ:GEN:PLU:FEM:GRU:SOL, " +
+ "ADJ:GEN:PLU:MAS:GRU:SOL, ADJ:GEN:PLU:NEU:GRU:SOL, " +
+ "ADJ:GEN:SIN:FEM:GRU:SOL, ADJ:NOM:SIN:MAS:GRU:IND, ADJ:NOM:SIN:MAS:GRU:SOL]", aToken.toSortedString());
+ assertEquals("groß", aToken.getReadings().get(0).getLemma());
+
+ // from both german.dict and added.txt:
+ aToken = tagger.lookup("Interessen");
+ assertEquals("Interessen[SUB:AKK:PLU:NEU, SUB:DAT:PLU:NEU, SUB:GEN:PLU:NEU, SUB:NOM:PLU:NEU]",
+ aToken.toSortedString());
+ assertEquals("Interesse", aToken.getReadings().get(0).getLemma());
+ assertEquals("Interesse", aToken.getReadings().get(1).getLemma());
+ assertEquals("Interesse", aToken.getReadings().get(2).getLemma());
+ assertEquals("Interesse", aToken.getReadings().get(3).getLemma());
+
+ // words that are not in the dictionary but that are recognized thanks to noun splitting:
+ aToken = tagger.lookup("Donaudampfschiff");
+ assertEquals("Donaudampfschiff[SUB:AKK:SIN:NEU, SUB:DAT:SIN:NEU, SUB:NOM:SIN:NEU]",
+ aToken.toSortedString());
+ assertEquals("Donaudampfschiff", aToken.getReadings().get(0).getLemma());
+ assertEquals("Donaudampfschiff", aToken.getReadings().get(1).getLemma());
+
+ aToken = tagger.lookup("Häuserkämpfe");
+ assertEquals("Häuserkämpfe[SUB:AKK:PLU:MAS, SUB:GEN:PLU:MAS, SUB:NOM:PLU:MAS]",
+ aToken.toSortedString());
+ assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma());
+ assertEquals("Häuserkampf", aToken.getReadings().get(1).getLemma());
+ assertEquals("Häuserkampf", aToken.getReadings().get(2).getLemma());
+
+ aToken = tagger.lookup("Häuserkampfes");
+ assertEquals("Häuserkampfes[SUB:GEN:SIN:MAS]", aToken.toSortedString());
+ assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma());
+
+ aToken = tagger.lookup("Häuserkampfs");
+ assertEquals("Häuserkampfs[SUB:GEN:SIN:MAS]", aToken.toSortedString());
+ assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma());
+
+ aToken = tagger.lookup("Lieblingsfarben");
+ assertEquals("Lieblingsfarben[SUB:AKK:PLU:FEM, SUB:DAT:PLU:FEM, SUB:GEN:PLU:FEM, " +
+ "SUB:NOM:PLU:FEM]", aToken.toSortedString());
+ assertEquals("Lieblingsfarbe", aToken.getReadings().get(0).getLemma());
+
+ aToken = tagger.lookup("Autolieblingsfarben");
+ assertEquals("Autolieblingsfarben[SUB:AKK:PLU:FEM, SUB:DAT:PLU:FEM, SUB:GEN:PLU:FEM, " +
+ "SUB:NOM:PLU:FEM]", aToken.toSortedString());
+ assertEquals("Autolieblingsfarbe", aToken.getReadings().get(0).getLemma());
+
+ aToken = tagger.lookup("übrigbleibst");
+ assertEquals("übrigbleibst[VER:2:SIN:PRÄ:NON:NEB]", aToken.toSortedString());
+ assertEquals("übrigbleiben", aToken.getReadings().get(0).getLemma());
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/de/german.dict"));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem()
+ + " lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java
new file mode 100644
index 0000000..24f83f8
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.pl;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tagging.pl.PolishTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class PolishChunkerTest extends TestCase {
+
+ private PolishTagger tagger;
+ private WordTokenizer tokenizer;
+ private SentenceTokenizer sentenceTokenizer;
+ private PolishChunker disambiguator;
+
+ public void setUp() {
+ tagger = new PolishTagger();
+ tokenizer = new WordTokenizer();
+ sentenceTokenizer = new SRXSentenceTokenizer("pl");
+ disambiguator = new PolishChunker();
+ }
+
+ public void testChunker() throws IOException {
+ //TestTools.myAssert("To jest duży dom.", "/[null]SENT_START To/[to]conj|To/[ten]adj:sg:nom.acc.voc:n1.n2 /[null]null jest/[być]verb:fin:sg:ter:imperf /[null]null duży/[duży]adj:sg:nom:m:pneg /[null]null dom/[dom]subst:sg:nom.acc:m3 ./[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ //TestTools.myAssert("Krowa pasie się na pastwisku.", "/[null]SENT_START Krowa/[krowa]subst:sg:nom:f /[null]null pasie/[pas]subst:sg:loc.voc:m3|pasie/[paść]verb:irreg /[null]null się/[siebie]qub /[null]null na/[na]prep:acc.loc /[null]null pastwisku/[pastwisko]subst:sg:dat:n+subst:sg:loc:n ./[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ //TestTools.myAssert("blablabla","/[null]SENT_START blablabla/[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("To test... dezambiguacji",
+ "/[null]SENT_START To/[ten]adj:sg:acc.nom.voc:n:pos|To/[to]conj /[null]null test/[test]subst:sg:acc.nom:m3|test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS> /[null]null dezambiguacji/[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("On, to znaczy premier, jest niezbyt mądry",
+ "/[null]SENT_START On/[on]ppron3:sg:nom:m:ter ,/[null]null /[null]null to/[ten]adj:sg:acc.nom.voc:n:pos|to/[to znaczy]<TO_ZNACZY>|to/[to]conj /[null]null znaczy/[to znaczy]</TO_ZNACZY>|znaczy/[znaczyć]verb:fin:sg:ter:imperf /[null]null premier/[premier]subst:sg:nom:m1|premier/[premiera]subst:pl:gen:f ,/[null]null /[null]null jest/[być]verb:fin:sg:ter:imperf /[null]null niezbyt/[zbyt]adv:neg /[null]null mądry/[mądry]adj:sg:acc:m3:pos|mądry/[mądry]adj:sg:acc:m3:pos:aff|mądry/[mądry]adj:sg:nom:m:pos|mądry/[mądry]adj:sg:nom:m:pos:aff|mądry/[mądry]adj:sg:voc:m1.m2:pos|mądry/[mądry]adj:sg:voc:m1.m2:pos:aff", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Lubię go z uwagi na krótkie włosy.",
+ "/[null]SENT_START Lubię/[lubić]verb:fin:sg:pri:imperf /[null]null go/[on]ppron3:sg:acc:m:ter:nakc:npraep|go/[on]ppron3:sg:gen:m.n.n1.n2:ter:nakc:npraep /[null]null z/[z uwagi na]<PREP:ACC>|z/[z]prep:gen.inst /[null]null uwagi/[uwaga]subst:pl:acc.gen.nom.voc:f|uwagi/[uwaga]subst:sg:dat.gen.loc:f /[null]null na/[na]prep:acc.loc|na/[z uwagi na]</PREP:ACC> /[null]null krótkie/[krótki]adj:pl:acc.nom.voc:f.m2.m3.n:pos:aff|krótkie/[krótki]adj:sg:acc.nom.voc:n:pos:aff /[null]null włosy/[włos]subst:pl:acc.nom.voc:m3|włosy/[włosy]subst:pltant:acc.nom.voc:n ./[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Test...",
+ "/[null]SENT_START Test/[test]subst:sg:acc.nom:m3|Test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS>", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Test... ",
+ "/[null]SENT_START Test/[test]subst:sg:acc.nom:m3|Test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS> /[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator);
+ }
+
+ }
+
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java
new file mode 100644
index 0000000..90af21d
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java
@@ -0,0 +1,236 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.TrimDisambiguator;
+
+public class DisambiguationRuleTest extends TestCase {
+
+ private static JLanguageTool langTool;
+
+ @Override
+ public void setUp() throws IOException {
+ if (langTool == null) {
+ langTool = new JLanguageTool(Language.ENGLISH);
+ }
+ }
+
+ public void testDisambiguationRulesFromXML() throws IOException,
+ ParserConfigurationException, SAXException {
+ testDisambiguationRulesFromXML(null, false);
+ }
+
+ private void testDisambiguationRulesFromXML(
+ final Set<Language> ignoredLanguages, final boolean verbose)
+ throws IOException, ParserConfigurationException, SAXException {
+ for (final Language lang : Language.LANGUAGES) {
+ if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
+ if (verbose) {
+ System.out.println("Ignoring tests for " + lang.getName());
+ }
+ continue;
+ }
+ if (verbose) {
+ System.out.println("Running tests for " + lang.getName() + "...");
+ }
+ final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
+ final JLanguageTool languageTool = new JLanguageTool(lang);
+ if (!(languageTool.getLanguage().getDisambiguator() instanceof DemoDisambiguator)
+ && !(languageTool.getLanguage().getDisambiguator() instanceof TrimDisambiguator)) {
+ final String name = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName()
+ + "/disambiguation.xml";
+ final List<DisambiguationPatternRule> rules = ruleLoader
+ .getRules(ruleLoader.getClass().getResourceAsStream(name));
+ testDisambiguationRulesFromXML(rules, languageTool, lang);
+ }
+ }
+ }
+
+ static String combine(String[] s, String glue) {
+ int k=s.length;
+ if (k==0)
+ return null;
+ StringBuilder out=new StringBuilder();
+ out.append(s[0]);
+ for (int x=1;x<k;++x)
+ out.append(glue).append(s[x]);
+ return out.toString();
+ }
+
+
+ static String sortForms(final String wordForms) {
+ if (",[,]".equals(wordForms)) {
+ return wordForms;
+ }
+ String word = wordForms.substring(0, wordForms.indexOf('[') + 1);
+ String forms = wordForms.substring(wordForms.indexOf('[')
+ + 1, wordForms.length() -1);
+ String[] formToSort = forms.split(",");
+ Arrays.sort(formToSort);
+ return word +
+ combine(formToSort, ",")
+ + "]";
+ }
+
+ private void testDisambiguationRulesFromXML(
+ final List<DisambiguationPatternRule> rules,
+ final JLanguageTool languageTool, final Language lang) throws IOException {
+ for (final DisambiguationPatternRule rule : rules) {
+ final String id = rule.getId();
+ if (rule.getUntouchedExamples() != null) {
+ final List<String> goodSentences = rule.getUntouchedExamples();
+ for (String goodSentence : goodSentences) {
+ // enable indentation use
+ goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
+ goodSentence = cleanXML(goodSentence);
+
+ assertTrue(goodSentence.trim().length() > 0);
+ final AnalyzedSentence sent = disambiguateUntil(rules, id,
+ languageTool.getRawAnalyzedSentence(goodSentence));
+ assertTrue("The untouched example for rule " + id + "was touched!",
+ sent.equals(rule.replace(sent)));
+ }
+ }
+ final List<DisambiguatedExample> examples = rule.getExamples();
+ if (examples != null) {
+ for (final DisambiguatedExample example : examples) {
+
+ final String outputForms = example.getDisambiguated();
+ assertTrue("No input form found for: " + id, outputForms != null);
+ assertTrue(outputForms.trim().length() > 0);
+ final int expectedMatchStart = example.getExample().indexOf(
+ "<marker>");
+ final int expectedMatchEnd = example.getExample()
+ .indexOf("</marker>")
+ - "<marker>".length();
+ if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
+ fail(lang
+ + ": No position markup ('<marker>...</marker>') in disambiguated example in rule "
+ + rule);
+ }
+ final String inputForms = example.getAmbiguous();
+ assertTrue("No input form found for: " + id, inputForms != null);
+ assertTrue(inputForms.trim().length() > 0);
+ assertTrue("Input and output forms for rule " + id + "are the same!",
+ !outputForms.equals(inputForms));
+ final AnalyzedSentence cleanInput = languageTool
+ .getRawAnalyzedSentence(cleanXML(example.getExample()));
+ final AnalyzedSentence sent = disambiguateUntil(rules, id,
+ languageTool
+ .getRawAnalyzedSentence(cleanXML(example.getExample())));
+ final AnalyzedSentence disambiguatedSent = rule
+ .replace(disambiguateUntil(rules, id, languageTool
+ .getRawAnalyzedSentence(cleanXML(example.getExample()))));
+ assertTrue(
+ "Disambiguated sentence is equal to the non-disambiguated sentence for rule :"
+ + id, !cleanInput.equals(disambiguatedSent));
+ assertTrue(
+ "Disambiguated sentence is equal to the input sentence for rule :"
+ + id, !sent.equals(disambiguatedSent));
+ String reading = "";
+ for (final AnalyzedTokenReadings readings : sent.getTokens()) {
+ if (readings.isSentStart() && inputForms.indexOf("<S>") == -1) {
+ continue;
+ }
+ if (readings.getStartPos() == expectedMatchStart) {
+ final AnalyzedTokenReadings r[] = { readings };
+ reading = new AnalyzedSentence(r).toString();
+ assertTrue(
+ "Wrong marker position in the example for the rule " + id,
+ readings.getStartPos() == expectedMatchStart
+ && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
+ break;
+ }
+ }
+ assertTrue("The input form for the rule " + id + " in the example: "
+ + example.toString() + " is different than expected (expected "
+ + inputForms + " but got " + sortForms(reading) + ").", sortForms(reading)
+ .equals(inputForms));
+ for (final AnalyzedTokenReadings readings : disambiguatedSent
+ .getTokens()) {
+ if (readings.isSentStart() && outputForms.indexOf("<S>") == -1) {
+ continue;
+ }
+ if (readings.getStartPos() == expectedMatchStart) {
+ final AnalyzedTokenReadings r[] = { readings };
+ reading = new AnalyzedSentence(r).toString();
+ assertTrue(readings.getStartPos() == expectedMatchStart
+ && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd);
+ break;
+ }
+ }
+ assertTrue("The output form for the rule " + id + " in the example: "
+ + example.toString() + " is different than expected (expected "
+ + outputForms + " but got " + sortForms(reading) + ").", sortForms(reading)
+ .equals(outputForms));
+ }
+ }
+ }
+ }
+
+ // useful for testing the rule cascade
+ private static AnalyzedSentence disambiguateUntil(
+ final List<DisambiguationPatternRule> rules, final String ruleID,
+ final AnalyzedSentence sentence) throws IOException {
+ AnalyzedSentence disambiguated = sentence;
+ for (final DisambiguationPatternRule rule : rules) {
+ if (ruleID.equals(rule.getId())) {
+ break;
+ }
+ disambiguated = rule.replace(disambiguated);
+ }
+ return disambiguated;
+ }
+
+ private static String cleanXML(final String str) {
+ return str.replaceAll("<.*?>", "");
+ }
+
+ /**
+ * Test XML patterns, as a help for people developing rules that are not
+ * programmers.
+ * @throws SAXException
+ * @throws ParserConfigurationException
+ */
+ public static void main(final String[] args) throws IOException, ParserConfigurationException, SAXException {
+ final DisambiguationRuleTest prt = new DisambiguationRuleTest();
+ System.out.println("Running disambiguator rule tests...");
+ prt.setUp();
+ prt.testDisambiguationRulesFromXML();
+ System.out.println("Tests successful.");
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java
new file mode 100644
index 0000000..81439bc
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java
@@ -0,0 +1,70 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.en;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
+import de.danielnaber.languagetool.tagging.en.EnglishTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class EnglishRuleDisambiguatorTest extends TestCase {
+ private EnglishTagger tagger;
+ private WordTokenizer tokenizer;
+ private SentenceTokenizer sentenceTokenizer;
+ private EnglishRuleDisambiguator disambiguator;
+ private DemoDisambiguator disamb2;
+
+ public void setUp() {
+ tagger = new EnglishTagger();
+ tokenizer = new WordTokenizer();
+ sentenceTokenizer = new SentenceTokenizer();
+ disambiguator = new EnglishRuleDisambiguator();
+ disamb2 = new DemoDisambiguator();
+ }
+
+ public void testChunker() throws IOException {
+ TestTools.myAssert("I cannot have it.",
+ "/[null]SENT_START I/[I]PRP /[null]null cannot/[can]MD /[null]null have/[have]VB /[null]null it/[it]PRP ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("I cannot have it.",
+ "/[null]SENT_START I/[I]PRP /[null]null cannot/[can]MD /[null]null have/[have]NN|have/[have]VB|have/[have]VBP /[null]null it/[it]PRP ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("He is to blame.",
+ "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null to/[to]IN|to/[to]TO /[null]null blame/[blame]VB ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("He is to blame.",
+ "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null to/[to]IN|to/[to]TO /[null]null blame/[blame]JJ|blame/[blame]NN:UN|blame/[blame]VB|blame/[blame]VBP ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("He is well known.",
+ "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null well/[well]RB /[null]null known/[known]JJ ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("He is well known.",
+ "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null well/[well]NN|well/[well]RB|well/[well]UH|well/[well]VB|well/[well]VBP /[null]null known/[know]VBN|known/[known]NN ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+
+ }
+
+}
+
+
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
new file mode 100644
index 0000000..e64ff60
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java
@@ -0,0 +1,81 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.fr;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
+import de.danielnaber.languagetool.tagging.fr.FrenchTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class FrenchRuleDisambiguatorTest extends TestCase {
+ private FrenchTagger tagger;
+ private WordTokenizer tokenizer;
+ private SentenceTokenizer sentenceTokenizer;
+ private FrenchRuleDisambiguator disambiguator;
+ private DemoDisambiguator disamb2;
+
+ public void setUp() {
+ tagger = new FrenchTagger();
+ tokenizer = new WordTokenizer();
+ sentenceTokenizer = new SentenceTokenizer();
+ disambiguator = new FrenchRuleDisambiguator();
+ disamb2 = new DemoDisambiguator();
+ }
+
+ public void testChunker() throws IOException {
+ TestTools.myAssert("Je ne suis pas la seule.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null ne/[null]A /[null]null suis/[être]V etre ind pres 1 s /[null]null pas/[pas]A /[null]null la/[le]D f s /[null]null seule/[seul]J f s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Je ne suis pas la seule.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null ne/[null]null /[null]null suis/[suivre]V imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2 s|suis/[être]V etre ind pres 1 s /[null]null pas/[pas]N f sp|pas/[pas]N m sp /[null]null la/[la]N m sp|la/[la]R pers obj 3 f s|la/[le]D f s /[null]null seule/[seul]D f s|seule/[seul]J f s|seule/[seul]N f s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("Il a enfin publié son livre.",
+ "/[null]SENT_START Il/[il]R pers suj 3 m s /[null]null a/[avoir]V avoir ind pres 3 s /[null]null enfin/[enfin]A /[null]null publié/[publier]V ppa m s /[null]null son/[son]D e s /[null]null livre/[livre]N e s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Il a enfin publié son livre.",
+ "/[null]SENT_START Il/[il]R pers suj 3 m s /[null]null a/[a]N m sp|a/[avoir]V avoir ind pres 3 s /[null]null enfin/[enfin]A /[null]null publié/[publier]V ppa m s|publié/[publié]J m s /[null]null son/[son]D m s|son/[son]N m s /[null]null livre/[livre]N e s|livre/[livrer]V imp pres 2 s|livre/[livrer]V ind pres 1 s|livre/[livrer]V ind pres 3 s|livre/[livrer]V sub pres 1 s|livre/[livrer]V sub pres 3 s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("Je danse toutes les semaines au club.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null danse/[danser]V ind pres 1 s /[null]null toutes/[tous]R f p|toutes/[tout]D f p|toutes/[touter]V ind pres 2 s|toutes/[touter]V sub pres 2 s /[null]null les/[le]D e p /[null]null semaines/[semaine]N f p /[null]null au/[au]D m s /[null]null club/[club]N m s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Je danse toutes les semaines au club.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null danse/[danse]N f s|danse/[danser]V imp pres 2 s|danse/[danser]V ind pres 1 s|danse/[danser]V ind pres 3 s|danse/[danser]V sub pres 1 s|danse/[danser]V sub pres 3 s /[null]null toutes/[tous]R f p|toutes/[tout]D f p|toutes/[touter]V ind pres 2 s|toutes/[touter]V sub pres 2 s /[null]null les/[le]D e p|les/[les]R pers obj 3 p /[null]null semaines/[semaine]N f p /[null]null au/[au]D m s /[null]null club/[club]N m s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("Quand j'étais petit, je jouais au football.",
+ "/[null]SENT_START Quand/[quand]C sub /[null]null j/[je]R pers suj 1 s '/[null]null étais/[être]V etre ind impa 1 s /[null]null petit/[petit]J m s ,/[null]null /[null]null je/[je]R pers suj 1 s /[null]null jouais/[jouer]V ind impa 1 s /[null]null au/[au]D m s /[null]null football/[football]N m s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Quand j'étais petit, je jouais au football.",
+ "/[null]SENT_START Quand/[quand]C sub /[null]null j/[j]N m sp|j/[je]R pers suj 1 s '/[null]null étais/[étai]N m p|étais/[être]V etre ind impa 1 s|étais/[être]V etre ind impa 2 s /[null]null petit/[petit]J m s|petit/[petit]N m s ,/[null]null /[null]null je/[je]R pers suj 1 s /[null]null jouais/[jouer]V ind impa 1 s|jouais/[jouer]V ind impa 2 s /[null]null au/[au]D m s /[null]null football/[football]N m s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools.myAssert("Je suis petite.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null suis/[être]V etre ind pres 1 s /[null]null petite/[petit]J f s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools.myAssert("Je suis petite.",
+ "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null suis/[suivre]V imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2 s|suis/[être]V etre ind pres 1 s /[null]null petite/[petit]J f s|petite/[petit]N f s ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ }
+
+}
+
+
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java
new file mode 100644
index 0000000..9e28e54
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java
@@ -0,0 +1,89 @@
+package de.danielnaber.languagetool.tagging.disambiguation.rules.ro;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
+import de.danielnaber.languagetool.tagging.ro.RomanianTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.ro.RomanianWordTokenizer;
+
+public class RomanianRuleDisambiguatorTest extends TestCase {
+
+ private RomanianTagger tagger;
+ private RomanianWordTokenizer tokenizer;
+ private SentenceTokenizer sentenceTokenizer;
+ private RomanianRuleDisambiguator disambiguator;
+ private DemoDisambiguator disamb2;
+
+ public void setUp() {
+ tagger = new RomanianTagger();
+ tokenizer = new RomanianWordTokenizer();
+ sentenceTokenizer = new SentenceTokenizer();
+ disambiguator = new RomanianRuleDisambiguator();
+ disamb2 = new DemoDisambiguator();
+ }
+
+ public void testCare1() throws IOException {
+ TestTools
+ .myAssert(
+ "Persoana care face treabă.",
+ "/[null]SENT_START Persoana/[persoană]Sfs3aac000 /[null]null care/[car]Snp3anc000|care/[care]0000000000|care/[care]N000a0l000|care/[căra]V0p3000cz0|care/[căra]V0s3000cz0 /[null]null face/[face]V000000f00|face/[face]V0s3000iz0 /[null]null treabă/[treabă]Sfs3anc000 ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools
+ .myAssert(
+ "Persoana care face treabă.",
+ "/[null]SENT_START Persoana/[persoană]Sfs3aac000 /[null]null care/[care]N000a0l000 /[null]null face/[face]V000000f00|face/[face]V0s3000iz0 /[null]null treabă/[treabă]Sfs3anc000 ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+
+ }
+
+ public void testEsteO() throws IOException {
+ TestTools
+ .myAssert(
+ "este o masă.",
+ "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masa]V0s3000is0|masă/[masă]Sfs3anc000 ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools
+ .myAssert(
+ "este o masă.",
+ "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masă]Sfs3anc000 ./[null]null",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools
+ .myAssert(
+ "este o masă",
+ "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masă]Sfs3anc000",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+
+ }
+
+ public void testDezambiguizareVerb() throws IOException {
+ TestTools
+ .myAssert(
+ "vom participa la",
+ "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null participa/[participa]V000000f00|participa/[participa]V0s3000ii0 /[null]null la/[la]P000000000|la/[la]Sms3anc000",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools
+ .myAssert(
+ "vom participa la",
+ "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null participa/[participa]V000000f00 /[null]null la/[la]P000000000|la/[la]Sms3anc000",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+
+ TestTools
+ .myAssert(
+ "vom culege",
+ "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null culege/[culege]V000000f00|culege/[culege]V0s2000m00|culege/[culege]V0s3000iz0",
+ tokenizer, sentenceTokenizer, tagger, disamb2);
+ TestTools
+ .myAssert(
+ "vom culege",
+ "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null culege/[culege]V000000f00",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ TestTools
+ .myAssert(
+ "veți culege",
+ "/[null]SENT_START veți/[vrea]V0p2000ivv /[null]null culege/[culege]V000000f00",
+ tokenizer, sentenceTokenizer, tagger, disambiguator);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java
new file mode 100644
index 0000000..9828c1d
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java
@@ -0,0 +1,90 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.en;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import junit.framework.TestCase;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+/**
+ * @author Daniel Naber
+ */
+public class EnglishTaggerTest extends TestCase {
+
+ private EnglishTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new EnglishTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ assertFalse(wd.getTag() == null);
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("This is a big house.",
+ "This/[this]DT|This/[this]PDT -- is/[be]VBZ -- a/[a]DT -- big/[big]JJ|big/[big]RB -- house/[house]NN|house/[house]VB|house/[house]VBP", tokenizer, tagger);
+ TestTools.myAssert("Marketing do a lot of trouble.",
+ "Marketing/[market]VBG|Marketing/[marketing]NN:U -- do/[do]VB|do/[do]VBP -- a/[a]DT -- lot/[lot]NN -- of/[of]IN -- trouble/[trouble]NN:UN|trouble/[trouble]VB|trouble/[trouble]VBP", tokenizer, tagger);
+ TestTools.myAssert("Manager use his laptop every day.",
+ "Manager/[manager]NN -- use/[use]NN:UN|use/[use]VB|use/[use]VBP -- his/[hi]NNS|his/[his]PRP$ -- laptop/[laptop]NN -- every/[every]DT -- day/[day]NN:UN", tokenizer, tagger);
+ TestTools.myAssert("This is a bigger house.",
+ "This/[this]DT|This/[this]PDT -- is/[be]VBZ -- a/[a]DT -- bigger/[big]JJR -- house/[house]NN|house/[house]VB|house/[house]VBP", tokenizer, tagger);
+ TestTools.myAssert("He doesn't believe me.",
+ "He/[he]PRP -- doesn/[do]VBZ -- t/[null]null -- believe/[believe]VB|believe/[believe]VBP -- me/[I]PRP", tokenizer, tagger);
+ TestTools.myAssert("It has become difficult.",
+ "It/[it]PRP -- has/[have]VBZ -- become/[become]VB|become/[become]VBN|become/[become]VBP -- difficult/[difficult]JJ", tokenizer, tagger);
+ }
+
+ public void testLemma() throws IOException {
+ EnglishTagger tagger = new EnglishTagger();
+ List<String> words = new ArrayList<String>();
+ words.add("Oliver");
+ words.add("works");
+ List<AnalyzedTokenReadings> aToken = tagger.tag(words);
+
+ assertEquals(2, aToken.size());
+ assertEquals(3, aToken.get(0).getReadings().size());
+ assertEquals(2, aToken.get(1).getReadings().size());
+
+ assertEquals("Oliver", aToken.get(0).getReadings().get(0).getLemma());
+ // TODO: are the following two correct?
+ assertEquals("oliver", aToken.get(0).getReadings().get(1).getLemma());
+ assertEquals("olive", aToken.get(0).getReadings().get(2).getLemma());
+
+ assertEquals("work", aToken.get(1).getReadings().get(0).getLemma());
+ assertEquals("work", aToken.get(1).getReadings().get(1).getLemma());
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java
new file mode 100644
index 0000000..b52ecc9
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java
@@ -0,0 +1,45 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.eo;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class EsperantoTaggerTest extends TestCase {
+
+ private EsperantoTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new EsperantoTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Tio estas simpla testo",
+ "Tio/[null]T nak np t o -- estas/[esti]V nt as -- simpla/[simpla]A nak np -- testo/[testo]O nak np", tokenizer, tagger);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java
new file mode 100644
index 0000000..fd373a7
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java
@@ -0,0 +1,59 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.es;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class SpanishTaggerTest extends TestCase {
+
+ private SpanishTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new SpanishTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Soy un hombre muy honrado.",
+ "Soy/[ser]VSIP1S0 -- un/[uno]DI0MS0 -- hombre/[hombre]I|hombre/[hombre]NCMS000 -- muy/[muy]RG -- honrado/[honrar]VMP00SM", tokenizer, tagger);
+ TestTools.myAssert("Tengo que ir a mi casa.",
+ "Tengo/[tener]VMIP1S0 -- que/[que]CS|que/[que]PR0CN000 -- ir/[ir]VMN0000 -- a/[a]NCFS000|a/[a]SPS00 -- mi/[mi]DP1CSS|mi/[mi]NCMS000 -- casa/[casa]NCFS000|casa/[casar]VMIP3S0|casa/[casar]VMM02S0", tokenizer, tagger);
+ TestTools.myAssert("blablabla","blablabla/[null]null", tokenizer, tagger);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java
new file mode 100644
index 0000000..f453891
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.fr;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class FrenchTaggerTest extends TestCase {
+
+ private FrenchTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new FrenchTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("C'est la vie.",
+ "C/[C]N m sp|C/[c]N m sp|C/[c]R dem e s -- est/[est]N m s|est/[être]V etre ind pres 3 s -- la/[la]N m sp|la/[la]R pers obj 3 f s|la/[le]D f s -- vie/[vie]N f s", tokenizer, tagger);
+ TestTools.myAssert("Je ne parle pas français.",
+ "Je/[je]R pers suj 1 s -- ne/[null]null -- parle/[parler]V imp pres 2 s|parle/[parler]V ind pres 1 s|parle/[parler]V ind pres 3 s|parle/[parler]V sub pres 1 s|parle/[parler]V sub pres 3 s -- pas/[pas]N f sp|pas/[pas]N m sp -- français/[français]J m sp|français/[français]N m sp", tokenizer, tagger);
+ TestTools.myAssert("blablabla","blablabla/[blablabla]N m s", tokenizer, tagger);
+ TestTools.myAssert("passagère","passagère/[passager]J f s|passagère/[passager]N f s", tokenizer, tagger);
+ TestTools.myAssert("non_existing_word","non_existing_word/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java
new file mode 100644
index 0000000..563d7ce
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.gl;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+/**
+ * @author Susana Sotelo Docio
+ * based on English test
+ */
+public class GalicianTaggerTest extends TestCase {
+
+ private GalicianTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new GalicianTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Todo vai mudar",
+ "Todo/[todo]DI0MS0|Todo/[todo]PI0MS000 -- vai/[ir]VMIP3S0|vai/[ir]VMM02S0 -- mudar/[mudar]VMN0000|mudar/[mudar]VMN01S0|mudar/[mudar]VMN03S0|mudar/[mudar]VMSF1S0|mudar/[mudar]VMSF3S0", tokenizer, tagger);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java
new file mode 100644
index 0000000..eeb6ffd
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.it;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class ItalianTaggerTest extends TestCase {
+
+ private ItalianTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new ItalianTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Non c'è linguaggio senza inganno.",
+ "Non/[non]ADV -- c/[C]NPR -- è/[essere]AUX:ind+pres+3+s|è/[essere]VER:ind+pres+3+s -- linguaggio/[linguaggio]NOUN-M:s -- senza/[senza]CON|senza/[senza]PRE -- inganno/[ingannare]VER:ind+pres+1+s|inganno/[inganno]NOUN-M:s", tokenizer, tagger);
+ TestTools.myAssert("Amo quelli che desiderano l'impossibile.",
+ "Amo/[amare]VER:ind+pres+1+s -- quelli/[quelli]PRO-DEMO-M-P|quelli/[quello]DET-DEMO:m+p -- che/[che]CON|che/[che]DET-WH:f+p|che/[che]DET-WH:f+s|che/[che]DET-WH:m+p|che/[che]DET-WH:m+s|che/[che]WH-CHE -- desiderano/[desiderare]VER:ind+pres+3+p -- l/[null]null -- impossibile/[impossibile]ADJ:pos+f+s|impossibile/[impossibile]ADJ:pos+m+s", tokenizer, tagger);
+ TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java
new file mode 100644
index 0000000..c6c4322
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.nl;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class DutchTaggerTest extends TestCase {
+
+ private DutchTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new DutchTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Dit is een Nederlandse zin om het programma'tje te testen.",
+ "Dit/[dit]DTh -- is/[zijn]VB3 -- een/[een]DTe|een/[een]NM|een/[een]NM1|een/[een]NN1d -- Nederlandse/[Nederlandse]NN1 -- zin/[zin]NN1d|zin/[zinnen]VB1 -- om/[om]PRom -- het/[het]DTh -- programma/[programma]NN1d|programma/[programma]NN1h -- tje/[null]null -- te/[te]PRte -- testen/[test]NN2|testen/[testen]VBi", tokenizer, tagger);
+ TestTools.myAssert("zwijnden","zwijnden/[zwijnen]VBh", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java
new file mode 100644
index 0000000..d9ced96
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.pl;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class PolishTaggerTest extends TestCase {
+
+ private PolishTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new PolishTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("To jest duży dom.",
+ "To/[ten]adj:sg:acc.nom.voc:n:pos|To/[to]conj -- jest/[być]verb:fin:sg:ter:imperf -- duży/[duży]adj:sg:acc:m3:pos:aff|duży/[duży]adj:sg:nom:m:pos:aff|duży/[duży]adj:sg:voc:m1.m2:pos:aff -- dom/[dom]subst:sg:acc.nom:m3", tokenizer, tagger);
+ TestTools.myAssert("Krowa pasie się na pastwisku.",
+ "Krowa/[krowa]subst:sg:nom:f -- pasie/[pas]subst:sg:loc.voc:m3 -- się/[siebie]qub -- na/[na]prep:acc.loc -- pastwisku/[pastwisko]subst:sg:dat.loc:n", tokenizer, tagger);
+ TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java
new file mode 100644
index 0000000..5d968af
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java
@@ -0,0 +1,97 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ro;
+
+/**
+ *
+ * These tests are kept to make sure UTF-8 dictionaries are correctly read.<br/>
+ * Prior to morfologik 1.1.4 some words containing diacritics were not correctly
+ * returned.
+ *
+ * @author Ionuț Păduraru
+ * @since 08.03.2009 19:25:50
+ */
+public class RomanianTaggerDiacriticsTest extends RomanianTaggerTestAbs {
+
+ /**
+ * "test_diacritics.dict" was built from a simple input file :
+ * <p>
+ * cușcă cușcă 001
+ * </p>
+ * <p>
+ * cartea carte 000
+ * </p>
+ * <p>
+ * mergeam merge 001
+ * </p>
+ * <p>
+ * merseserăm merge 002
+ * </p>
+ * <p>
+ * cuțit cuțit 001
+ * </p>
+ * <p>
+ * cuțitul cuțit 002
+ * </p>
+ *
+ * @author Ionuț Păduraru
+ * @since 08.03.2009 19:15:59
+ * @throws Exception
+ */
+ @Override
+ protected RomanianTagger createTagger() {
+ RomanianTagger res = new RomanianTagger(
+ "/ro/test_diacritics.dict");
+ return res;
+ }
+
+ /**
+ * Prior to morfologik 1.1.4: For "merseserăm" the lemma is incorect: "mege"
+ * instead of "merge". If the dictionary is used from
+ * command-line(/fsa_morph -d ...), the correct lemma is returned.
+ *
+ * @author Ionuț Păduraru
+ * @since 08.03.2009 19:25:59
+ * @throws Exception
+ */
+ public void testTaggerMerseseram() throws Exception {
+ // these tests are using "test_diacritics.dict"
+ assertHasLemmaAndPos("făcusem", "face", "004");
+ assertHasLemmaAndPos("cuțitul", "cuțit", "002");
+ // make sure lemma is correct (POS is hard-coded, not important)
+ assertHasLemmaAndPos("merseserăm", "merge", "002");
+ }
+
+ /**
+ *
+ * @author Ionuț Păduraru
+ * @since 24.03.2009 21:39:25
+ * @throws Exception
+ */
+ public void testTaggerCuscaCutit() throws Exception {
+ // these tests are using "test_diacritics.dict"
+ // all these are correct, they are here just to prove that "some" words
+ // are corectly returned
+
+ assertHasLemmaAndPos("cușcă", "cușcă", "001");
+ assertHasLemmaAndPos("cuțit", "cuțit", "001");
+ assertHasLemmaAndPos("cuțitul", "cuțit", "002");
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java
new file mode 100644
index 0000000..05f37cc
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java
@@ -0,0 +1,105 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ro;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:36:32
+ */
+public class RomanianTaggerTest extends RomanianTaggerTestAbs {
+
+ /**
+ * First we test if the tagger works fine with single words
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:50:10
+ * @throws Exception
+ */
+ public void testTagger_Merge() throws Exception {
+ // merge - verb indicativ imperfect, persoana întâi, singular
+ assertHasLemmaAndPos("mergeam", "merge", "V0s1000ii0");
+ // merge - verb indicativ imperfect, persoana întâi, plural
+ assertHasLemmaAndPos("mergeam", "merge", "V0p1000ii0");
+ // merge - verb indicativ imperfect, persoana întâi, plural
+ }
+
+ /**
+ * <code>merseserăm</code> had some problems (incorect lemma - mege -
+ * missing "r")
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 20:24:55
+ * @throws Exception
+ */
+ public void testTagger_Merseseram() throws Exception {
+ // first make sure lemma is correct (ignore POS)
+ assertHasLemmaAndPos("merseserăm", "merge", null);
+ // now that lemma is correct, also check POS
+ assertHasLemmaAndPos("merseserăm", "merge", "V0p1000im0");
+ }
+
+ /**
+ * A special word: a fi (to be) - eu sunt (i am) + ei sunt (they are)
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 20:21:10
+ * @throws Exception
+ */
+ public void testTagger_Fi() throws Exception {
+ // fi - verb indicativ prezent, persoana întâi, singular
+ assertHasLemmaAndPos("sunt", "fi", "V0s1000izf");
+ // fi verb indicativ prezent, persoana a treia, plural
+ assertHasLemmaAndPos("sunt", "fi", "V0p3000izf");
+ }
+
+ /**
+ * the big picture: test is tagger performs well with a sentence
+ *
+ * @author ionuț păduraru
+ * @since 20.02.2009 01:12:33
+ * @throws IOException
+ */
+ public void testTagger() throws IOException {
+ TestTools
+ .myAssert(
+ "Cartea este frumoasă.",
+ "Cartea/[carte]Sfs3aac000 -- este/[fi]V0s3000izb -- frumoasă/[frumos]Afs3an0000",
+ getTokenizer(), getTagger());
+ }
+
+ /**
+ *
+ * @author ionuț păduraru
+ * @since 20.02.2009 01:44:50
+ * @throws IOException
+ */
+ public void testTaggerMerseseram() throws IOException {
+ TestTools.myAssert("merseserăm", "merseserăm/[merge]V0p1000im0",
+ getTokenizer(), getTagger());
+ }
+
+ public static void main(String[] args) {
+ junit.textui.TestRunner.run(RomanianTaggerTest.class);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java
new file mode 100644
index 0000000..ca5bdf9
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java
@@ -0,0 +1,147 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ro;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+/**
+ * <p>
+ * Root class for RomanianTagger tests
+ * </p>
+ * <p>
+ * Provides convenient methods to find specific lemma/pos
+ * </p>
+ *
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:36:32
+ *
+ */
+public abstract class RomanianTaggerTestAbs extends TestCase {
+
+ private RomanianTagger tagger;
+ private WordTokenizer tokenizer;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see junit.framework.TestCase#setUp()
+ */
+ public void setUp() {
+ tagger = createTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ /**
+ *
+ * @author Ionuț Păduraru
+ * @since 08.03.2009 22:09:01
+ * @return
+ */
+ protected RomanianTagger createTagger() {
+ // override this if you need need another dictionary (a disctionary
+ // based on another file)
+ return new RomanianTagger();
+ }
+
+ /**
+ * Verify if <code>inflected</code> contains the specified lemma and pos
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:17:54
+ * @param inflected
+ * - input word, inflected form
+ * @param lemma
+ * expected lemma
+ * @param posTag
+ * expected tag for lemma
+ * @throws IOException
+ */
+ protected void assertHasLemmaAndPos(String inflected, String lemma,
+ String posTag) throws IOException {
+ List<AnalyzedTokenReadings> tags = tagger.tag(createList(inflected));
+ StringBuilder allTags = new StringBuilder();
+ boolean found = false;
+ for (AnalyzedTokenReadings analyzedTokenReadings : tags) {
+ int length = analyzedTokenReadings.getReadingsLength();
+ for (int i = 0; i < length; i++) {
+ AnalyzedToken token = analyzedTokenReadings.getAnalyzedToken(i);
+ String crtLemma = token.getLemma();
+ String crtPOSTag = token.getPOSTag();
+ allTags.append(String.format("[%s/%s]", crtLemma, crtPOSTag));
+ found = ((null == lemma) || (lemma.equals(crtLemma)))
+ && ((null == posTag) || (posTag.equals(crtPOSTag)));
+ if (found)
+ break;
+ } // for i
+ if (found)
+ break;
+ } // foreach tag
+ assertTrue(String.format("Lemma and POS not found for word [%s]! "
+ + "Expected [%s/%s]. Actual: %s", inflected, lemma, posTag,
+ allTags.toString()), found);
+ }
+
+ /**
+ * Create a List containing some words
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:13:57
+ * @param words
+ * @return
+ */
+ private List<String> createList(String... words) {
+ List<String> res = new ArrayList<String>();
+ for (String s : words) {
+ res.add(s);
+ }
+ return res;
+ }
+
+ public RomanianTagger getTagger() {
+ return tagger;
+ }
+
+ public WordTokenizer getTokenizer() {
+ return tokenizer;
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java
new file mode 100644
index 0000000..3a555d1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java
@@ -0,0 +1,59 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ru;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class RussianTaggerTest extends TestCase {
+
+ private RussianTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new RussianTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему.",
+ "Все/[весь]PADJ:PL:Nom|Все/[весь]PADJ:PL:V|Все/[все]ADV|Все/[все]PNN:PL:Nom|Все/[все]PNN:PL:V|Все/[все]PNN:Sin:Nom|Все/[все]PNN:Sin:V -- счастливые/[счастливый]ADJ:PL:Nom|счастливые/[счастливый]ADJ:PL:V -- семьи/[семья]NN:Fem:PL:Nom|семьи/[семья]NN:Fem:PL:V|семьи/[семья]NN:Fem:Sin:R -- похожи/[похожий]ADJ_Short:PL -- друг/[друг]NN:Masc:Sin:Nom -- на/[на]PREP -- друга/[друг]NN:Masc:Sin:R|друга/[друг]NN:Masc:Sin:V -- каждая/[каждый]PADJ:Fem:Nom -- несчастливая/[несчастливый]ADJ:Fem:Nom -- семья/[семья]NN:Fem:Sin:Nom -- несчастлива/[несчастливый]ADJ_Short:Fem -- по-своему/[по-своему]ADV", tokenizer, tagger);
+ TestTools.myAssert("Все смешалось в доме Облонских.",
+ "Все/[весь]PADJ:PL:Nom|Все/[весь]PADJ:PL:V|Все/[все]ADV|Все/[все]PNN:PL:Nom|Все/[все]PNN:PL:V|Все/[все]PNN:Sin:Nom|Все/[все]PNN:Sin:V -- смешалось/[смешаться]VB:Past:Neut -- в/[в]PREP -- доме/[дом]NN:Masc:Sin:P -- Облонских/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java
new file mode 100644
index 0000000..5949afc
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.sk;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class SlovakTaggerTest extends TestCase {
+
+ private SlovakTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new SlovakTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Tu nájdete vybrané čísla a obsahy časopisu Kultúra slova.",
+ "Tu/[tu]J|Tu/[tu]PD|Tu/[tu]T -- nájdete/[nájsť]VKdpb+ -- vybrané/[vybraný]Gtfp1x|vybrané/[vybraný]Gtfp4x|vybrané/[vybraný]Gtfp5x|vybrané/[vybraný]Gtip1x|vybrané/[vybraný]Gtip4x|vybrané/[vybraný]Gtip5x|vybrané/[vybraný]Gtnp1x|vybrané/[vybraný]Gtnp4x|vybrané/[vybraný]Gtnp5x|vybrané/[vybraný]Gtns1x|vybrané/[vybraný]Gtns4x|vybrané/[vybraný]Gtns5x -- čísla/[číslo]SSnp1|čísla/[číslo]SSnp4|čísla/[číslo]SSnp5|čísla/[číslo]SSns2 -- a/[a]J|a/[a]O|a/[a]Q|a/[a]SUnp1|a/[a]SUnp2|a/[a]SUnp3|a/[a]SUnp4|a/[a]SUnp5|a/[a]SUnp6|a/[a]SUnp7|a/[a]SUns1|a/[a]SUns2|a/[a]SUns3|a/[a]SUns4|a/[a]SUns5|a/[a]SUns6|a/[a]SUns7|a/[a]T|a/[a]W|a/[as]W -- obsahy/[obsah]SSip1|obsahy/[obsah]SSip4|obsahy/[obsah]SSip5 -- časopisu/[časopis]SSis2|časopisu/[časopis]SSis3 -- Kultúra/[kultúra]SSfs1|Kultúra/[kultúra]SSfs5 -- slova/[slovo]SSns2", tokenizer, tagger);
+ TestTools.myAssert("blabla","blabla/[null]null", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java
new file mode 100644
index 0000000..128d46a
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.sv;
+
+import java.io.IOException;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class SwedishTaggerTest extends TestCase {
+
+ private SwedishTagger tagger;
+ private WordTokenizer tokenizer;
+
+ public void setUp() {
+ tagger = new SwedishTagger();
+ tokenizer = new WordTokenizer();
+ }
+
+ public void testDictionary() throws IOException {
+ final Dictionary dictionary = Dictionary.read(
+ this.getClass().getResource(tagger.getFileName()));
+ final DictionaryLookup dl = new DictionaryLookup(dictionary);
+ for (WordData wd : dl) {
+ if (wd.getTag() == null || wd.getTag().length() == 0) {
+ System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem()
+ + " lacks a POS tag in the dictionary.");
+ }
+ }
+ }
+
+ public void testTagger() throws IOException {
+ TestTools.myAssert("Det är nog bäst att du får en klubba till",
+ "Det/[det]PN -- är/[vara]VB:PRS -- nog/[nog]AB -- bäst/[bra]JJ:S|bäst/[bäst]AB|bäst/[god]JJ:S -- att/[att]KN -- du/[du]PN -- får/[få]VB:PRS|får/[får]NN:OF:PLU:NOM:NEU|får/[får]NN:OF:SIN:NOM:NEU -- en/[en]NN:OF:SIN:NOM:UTR|en/[en]PN|en/[passant]en passant NN:OF:SIN:NOM:UTR|en/[passanten]en passant NN:BF:SIN:NOM:UTR|en/[passantens]en passant NN:BF:SIN:GEN:UTR|en/[passanter]en passant NN:OF:PLU:NOM:UTR|en/[passanterna]en passant NN:BF:PLU:NOM:UTR|en/[passanternas]en passant NN:BF:PLU:GEN:UTR|en/[passanters]en passant NN:OF:PLU:GEN:UTR|en/[passants]en passant NN:OF:SIN:GEN:UTR -- klubba/[klubba]NN:OF:SIN:NOM:UTR|klubba/[klubba]VB:IMP|klubba/[klubba]VB:INF -- till/[till]AB|till/[till]PP", tokenizer, tagger);
+ TestTools.myAssert("Du menar sannolikt \"massera\" om du inte skriver om masarnas era förstås.",
+ "Du/[du]PN -- menar/[mena]VB:PRS -- sannolikt/[sannolik]JJ:PN|sannolikt/[sannolikt]AB -- massera/[massera]VB:IMP|massera/[massera]VB:INF -- om/[om]AB|om/[om]KN|om/[om]PP -- du/[du]PN -- inte/[inte]AB -- skriver/[skriva]VB:PRS -- om/[om]AB|om/[om]KN|om/[om]PP -- masarnas/[mas]NN:BF:PLU:GEN:UTR -- era/[era]NN:OF:SIN:NOM:UTR|era/[era]PN -- förstås/[förstå]VB:INF:PF|förstås/[förstå]VB:PRS:PF|förstås/[förstås]AB", tokenizer, tagger);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt b/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt
new file mode 100644
index 0000000..5e7c8fb
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt
@@ -0,0 +1 @@
+This is an test. \ No newline at end of file
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java
new file mode 100644
index 0000000..6acf29c
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java
@@ -0,0 +1,120 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+
+/*
+ * Russian SRX Sentence Tokenizer Test
+ * $Id$
+ */
+
+
+public class RussianSRXSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("ru");
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("ru");
+
+
+ public final void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public final void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." });
+
+ testSplit(new String[] { "Heute ist der 13.12.2004." });
+ testSplit(new String[] { "Es geht am 24.09. los." });
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" });
+
+ // Tests taken from LanguageTool's SentenceSplitterTest.py:
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+ testSplit(new String[] { "Don't split strings like U.S.A. either." });
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ // known to fail:
+ // testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "He won't say No.", "Not really." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." });
+ // From the Russian abbreviation list:
+ testSplit(new String[] { "Отток капитала из России составил 7 млрд. долларов, сообщил министр финансов Алексей Кудрин." });
+ testSplit(new String[] { "Журнал издаётся с 1967 г., пользуется большой популярностью в мире." });
+ testSplit(new String[] { "С 2007 г. периодичность выхода газеты – 120 раз в год." });
+ testSplit(new String[] { "Редакция журнала находится в здании по адресу: г. Москва, 110000, улица Мира, д. 1." });
+ testSplit(new String[] { "Все эти вопросы заставляют нас искать ответы в нашей истории 60-80-х гг. прошлого столетия." });
+ testSplit(new String[] { "Более 300 тыс. документов и справочников." });
+ testSplit(new String[] { "Скидки до 50000 руб. на автомобили." });
+ testSplit(new String[] { "Изготовление визиток любыми тиражами (от 20 шт. до 10 тысяч) в минимальные сроки (от 20 минут)." });
+ }
+
+ public final void testSplit(final String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java
new file mode 100644
index 0000000..7f3b76c
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java
@@ -0,0 +1,108 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ * @author Daniel Naber
+ */
+public class SRXSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("en");
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("en");
+
+ public void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ public void testTokenize() {
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" });
+
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+ testSplit(new String[] { "This is e.g. Mr. Smith, who talks slowly...",
+ "But this is another sentence." });
+ testSplit(new String[] { "Chanel no. 5 is blah." });
+ testSplit(new String[] { "Mrs. Jones gave Peter $4.5, to buy Chanel No 5.",
+ "He never came back." });
+ testSplit(new String[] { "On p. 6 there's nothing. ", "Another sentence." });
+ testSplit(new String[] { "Leave me alone!, he yelled. ", "Another sentence." });
+ testSplit(new String[] { "\"Leave me alone!\", he yelled." });
+ testSplit(new String[] { "'Leave me alone!', he yelled. ", "Another sentence." });
+ testSplit(new String[] { "'Leave me alone!,' he yelled. ", "Another sentence." });
+ testSplit(new String[] { "This works on the phrase level, i.e. not on the word level." });
+ testSplit(new String[] { "Let's meet at 5 p.m. in the main street." });
+ testSplit(new String[] { "James comes from the U.K. where he worked as a programmer." });
+ testSplit(new String[] { "Don't split strings like U.S.A. please." });
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ testSplit(new String[] { "The sentence (...) ends here." });
+ testSplit(new String[] { "The sentence [...] ends here." });
+ testSplit(new String[] { "The sentence ends here (...). ", "Another sentence." });
+ // previously known failed but not now :)
+ testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[]{"He will not. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "He won't say No.", "Not really." });
+ testSplit(new String[] { "He won't say no. 5 is better. ", "Not really." });
+ testSplit(new String[] { "He won't say No. 5 is better. ", "Not really." });
+ testSplit(new String[] { "They met at 5 p.m. on Thursday." });
+ testSplit(new String[] { "They met at 5 p.m. ", "It was Thursday." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." });
+ // From the abbreviation list:
+ testSplit(new String[] { "Jones Bros. have built a succesful company." });
+ // parentheses:
+ testSplit(new String[] { "It (really!) works." });
+ testSplit(new String[] { "It [really!] works." });
+ testSplit(new String[] { "It works (really!). ", "No doubt." });
+ testSplit(new String[] { "It works [really!]. ", "No doubt." });
+ testSplit(new String[] { "It really(!) works well." });
+ testSplit(new String[] { "It really[!] works well." });
+ }
+
+ private void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java
new file mode 100644
index 0000000..9d15429
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java
@@ -0,0 +1,107 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ * @author Daniel Naber
+ */
+public class SentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SentenceTokenizer();
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SentenceTokenizer();
+
+ public void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ public void testTokenize() {
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" });
+
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+ testSplit(new String[] { "This is e.g. Mr. Smith, who talks slowly...",
+ "But this is another sentence." });
+ testSplit(new String[] { "Chanel no. 5 is blah." });
+ testSplit(new String[] { "Mrs. Jones gave Peter $4.5, to buy Chanel No 5.",
+ "He never came back." });
+ testSplit(new String[] { "On p. 6 there's nothing. ", "Another sentence." });
+ testSplit(new String[] { "Leave me alone!, he yelled. ", "Another sentence." });
+ testSplit(new String[] { "\"Leave me alone!\", he yelled." });
+ testSplit(new String[] { "'Leave me alone!', he yelled. ", "Another sentence." });
+ testSplit(new String[] { "'Leave me alone!,' he yelled. ", "Another sentence." });
+ testSplit(new String[] { "This works on the phrase level, i.e. not on the word level." });
+ testSplit(new String[] { "Let's meet at 5 p.m. in the main street." });
+ testSplit(new String[] { "James comes from the U.K. where he worked as a programmer." });
+ testSplit(new String[] { "Don't split strings like U.S.A. please." });
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ testSplit(new String[] { "The sentence (...) ends here." });
+ testSplit(new String[] { "The sentence [...] ends here." });
+ testSplit(new String[] { "The sentence ends here (...). ", "Another sentence." });
+ // TODO: known to fail:
+ // testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[]{"He will not. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "He won't say No.", "Not really." });
+ testSplit(new String[] { "He won't say no. 5 is better. ", "Not really." });
+ testSplit(new String[] { "He won't say No. 5 is better. ", "Not really." });
+ testSplit(new String[] { "They met at 5 p.m. on Thursday." });
+ testSplit(new String[] { "They met at 5 p.m. ", "It was Thursday." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." });
+ // From the abbreviation list:
+ testSplit(new String[] { "Jones Bros. have built a succesful company." });
+ // parentheses:
+ testSplit(new String[] { "It (really!) works." });
+ testSplit(new String[] { "It [really!] works." });
+ testSplit(new String[] { "It works (really!). ", "No doubt." });
+ testSplit(new String[] { "It works [really!]. ", "No doubt." });
+ testSplit(new String[] { "It really(!) works well." });
+ testSplit(new String[] { "It really[!] works well." });
+ }
+
+ private void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java
new file mode 100644
index 0000000..675dfb0
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java
@@ -0,0 +1,38 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers;
+
+import junit.framework.TestCase;
+
+import java.util.List;
+
+public class WordTokenizerTest extends TestCase {
+
+ public void testTokenize() {
+ WordTokenizer w = new WordTokenizer();
+ List <String> testList = w.tokenize("This is\u00A0a test");
+ assertEquals(testList.size(), 7);
+ assertEquals("[This, , is, \u00A0, a, , test]", testList.toString());
+ testList = w.tokenize("This\rbreaks");
+ assertEquals(3, testList.size());
+ assertEquals("[This, \r, breaks]", testList.toString());
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java
new file mode 100644
index 0000000..eb6d17a
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java
@@ -0,0 +1,118 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.cs;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class CzechSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new CzechSentenceTokenizer();
+
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new CzechSentenceTokenizer();
+
+ public final void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public final void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Tři sta třicet tři stříbrných křepelek přeletělo přes stři sta třicet tři stříbrných střech." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." });
+
+ testSplit(new String[] { "Heute ist der 13.12.2004." });
+ testSplit(new String[] { "Dnes je 16.3.2007." });
+ testSplit(new String[] { "Tohle je 1. verze testu českého tokenizeru." });
+ testSplit(new String[] { "Es geht am 24.09. los." });
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ testSplit(new String[] {
+ "„Česká sazba se oproti okolnímu světu v některých aspektech mírně liší”. ", "Bylo řečeno." });
+ testSplit(new String[] { "„Jeď nejrychleji jak můžeš”, řekla mu tiše." });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" });
+
+ // Tests taken from LanguageTool's SentenceSplitterTest.py:
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ // known to fail:
+ // testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "He won't say No.", "Not really." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." });
+ // From the abbreviation list:
+ testSplit(new String[] { "V češtině jsou zkr. i pro jazyky, např. angl., maď. a jiné." });
+ testSplit(new String[] { "Titul jako doc. RNDr. Adam Řezník, Ph.D. se může vyskytnout." });
+ testSplit(new String[] { "Starověký Egypt vznikl okolo r. 3150 př.n.l. (anebo 3150 př.kr.). ",
+ "A zanikl v r. 31 př.kr." });
+ }
+
+ private final void testSplit(final String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java
new file mode 100644
index 0000000..3151ed7
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java
@@ -0,0 +1,82 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Esben Aaberg
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.da;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+/**
+ * @author Esben Aaberg
+ */
+public class DanishSRXSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private final SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("da");
+
+ public void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ }
+
+ public void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dette er en sætning." });
+ testSplit(new String[] { "Dette er en sætning. ", "Her er den næste." });
+ testSplit(new String[] { "En sætning! ", "Yderlige en." });
+ testSplit(new String[] { "En sætning... ", "Yderlige en." });
+ testSplit(new String[] { "På hjemmesiden http://www.stavekontrolden.dk bygger vi stavekontrollen." });
+ testSplit(new String[] { "Den 31.12. går ikke!" });
+ testSplit(new String[] { "Den 3.12.2011 går ikke!" });
+ testSplit(new String[] { "I det 18. og tidlige 19. århundrede hentede amerikansk kunst det meste af sin inspiration fra Europa." });
+
+ testSplit(new String[] { "Hendes Majestæt Dronning Margrethe II (Margrethe Alexandrine Þórhildur Ingrid, Danmarks dronning) (født 16. april 1940 på Amalienborg Slot) er siden 14. januar 1972 Danmarks regent." });
+ testSplit(new String[] { "Hun har residensbolig i Christian IX's Palæ på Amalienborg Slot." });
+ testSplit(new String[] { "Tronfølgeren ledte herefter statsrådsmøderne under Kong Frederik 9.'s fravær." });
+ testSplit(new String[] { "Marie Hvidt, Frederik IV - En letsindig alvorsmand, Gads Forlag, 2004." });
+ testSplit(new String[] { "Da vi første gang besøgte Restaurant Chr. IV, var vi de eneste gæster." });
+
+ testSplit(new String[] { "I dag er det den 25.12.2010." });
+ testSplit(new String[] { "I dag er det d. 25.12.2010." });
+ testSplit(new String[] { "I dag er den 13. december." });
+ testSplit(new String[] { "Arrangementet starter ca. 17:30 i dag." });
+ testSplit(new String[] { "Arrangementet starter ca. 17:30." });
+ testSplit(new String[] { "Det er nævnt i punkt 3.6.4 Rygbelastende helkropsvibrationer." });
+
+ testSplit(new String[] { "Rent praktisk er det også lettest lige at mødes, så der kan udveksles nøgler og brugsanvisninger etc." });
+ testSplit(new String[] { "Andre partier incl. borgerlige partier har deres særlige problemer: nogle samarbejder med apartheidstyret i Sydafrika, med NATO-landet Tyrkiet etc., men det skal så sandelig ikke begrunde en SF-offensiv for et samarbejde med et parti." });
+
+ testSplit(new String[] { "Hvad nu,, den bliver også." });
+ testSplit(new String[] { "Det her er det.. ", "Og her fortsætter det." });
+
+ testSplit(new String[] { "Dette er en(!) sætning." });
+ testSplit(new String[] { "Dette er en(!!) sætning." });
+ testSplit(new String[] { "Dette er en(?) sætning." });
+ testSplit(new String[] { "Dette er en(??) sætning." });
+ testSplit(new String[] { "Dette er en(???) sætning." });
+ testSplit(new String[] { "Militær værnepligt blev indført (traktaten krævede, at den tyske hær ikke oversteg 100.000 mand)." });
+
+ testSplit(new String[] { "Siden illustrerede hun \"Historierne om Regnar Lodbrog\" 1979 og \"Bjarkemål\" 1982 samt Poul Ørums \"Komedie i Florens\" 1990." });
+ }
+
+ public void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java
new file mode 100644
index 0000000..179662d
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java
@@ -0,0 +1,108 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.de;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+/**
+ * @author Daniel Naber
+ */
+public class GermanSRXSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("de");
+ // accept only \n\n as paragraph:
+ private SRXSentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("de");
+
+ public void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." });
+ testSplit(new String[] { "Natürliche Vererbungsprozesse prägten sich erst im 18. und frühen 19. Jahrhundert aus." });
+
+ testSplit(new String[] { "Friedrich I., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich II., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich IIXC., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich II. öfter auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich VII. öfter auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich X. öfter auch bekannt als Friedrich der Zehnte." });
+
+ testSplit(new String[] { "Heute ist der 13.12.2004." });
+ testSplit(new String[] { "Heute ist der 13. Dezember." });
+ testSplit(new String[] { "Heute ist der 1. Januar." });
+ testSplit(new String[] { "Es geht am 24.09. los." });
+ testSplit(new String[] { "Es geht um ca. 17:00 los." });
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ testSplit(new String[] { "Diese Periode begann im 13. Jahrhundert und damit bla." });
+ testSplit(new String[] { "Diese Periode begann im 13. oder 14. Jahrhundert und damit bla." });
+ testSplit(new String[] { "Diese Periode datiert auf das 13. bis zum 14. Jahrhundert und damit bla." });
+
+ testSplit(new String[] { "Das gilt lt. aktuellem Plan." });
+ testSplit(new String[] { "Orangen, Äpfel etc. werden gekauft." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ testSplit(new String[] { "»Der Papagei ist grün.« ", "Das kam so." });
+ testSplit(new String[] { "»Der Papagei ist grün«, sagte er" });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // Tests created as part of regression testing of SRX tokenizer.
+ // They come from Schuld und Sühne (Crime and Punishment) book.
+ testSplit(new String[] { "schlug er die Richtung nach der K … brücke ein. " });
+ testSplit(new String[] { "sobald ich es von einem Freunde zurückbekomme …« Er wurde verlegen und schwieg." });
+ // testSplit(new String[] { "Verstehen Sie wohl? ", "… ", "Gestatten Sie mir noch die Frage" });
+ testSplit(new String[] { "Er kannte eine Unmenge Quellen, aus denen er schöpfen konnte, d. h. natürlich, wo er durch Arbeit sich etwas verdienen konnte." });
+ testSplit(new String[] { "Stimme am lautesten heraustönte …. ", "Sobald er auf der Straße war" });
+// testSplit(new String[] { "Aber nein doch, er hörte alles nur zu deutlich! ", "\n", "… ", "›Also, wenn's so ist" });
+ testSplit(new String[] { "»Welche Wohnung?\" ", "»Die, wo wir arbeiten." });
+ testSplit(new String[] { "»Nun also, wie ist's?« fragte Lushin und blickte sie fest an." });
+// testSplit(new String[] { "gezeigt hat.« ", "… ", "Hm! " });
+ }
+
+ public void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java
new file mode 100644
index 0000000..6033df3
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java
@@ -0,0 +1,100 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.de;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+
+/**
+ * @author Daniel Naber
+ */
+public class GermanSentenceTokenizerTest extends TestCase {
+
+ private GermanSentenceTokenizer sTokenizer = new GermanSentenceTokenizer();
+ // accept "foo" as an abbreviation:
+ private GermanSentenceTokenizer sTokenizerWithFoo = new GermanSentenceTokenizer(new String[]{"foo"});
+
+ public void setUp() {
+ sTokenizer.setSingleLineBreaksMarksParagraph(true);
+ }
+
+ public void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." });
+ testSplit(new String[] { "Natürliche Vererbungsprozesse prägten sich erst im 18. und frühen 19. Jahrhundert aus." });
+
+ testSplit(new String[] { "Friedrich I., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich II., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich IIXC., auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich II. öfter auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich VII. öfter auch bekannt als Friedrich der Große." });
+ testSplit(new String[] { "Friedrich X. öfter auch bekannt als Friedrich der Zehnte." });
+
+ testSplit(new String[] { "Heute ist der 13.12.2004." });
+ testSplit(new String[] { "Heute ist der 13. Dezember." });
+ testSplit(new String[] { "Heute ist der 1. Januar." });
+ testSplit(new String[] { "Es geht am 24.09. los." });
+ testSplit(new String[] { "Es geht um ca. 17:00 los." });
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ testSplit(new String[] { "Diese Periode begann im 13. Jahrhundert und damit bla." });
+ testSplit(new String[] { "Diese Periode begann im 13. oder 14. Jahrhundert und damit bla." });
+ testSplit(new String[] { "Diese Periode datiert auf das 13. bis zum 14. Jahrhundert und damit bla." });
+
+ testSplit(new String[] { "Das gilt lt. aktuellem Plan." });
+ testSplit(new String[] { "Orangen, Äpfel etc. werden gekauft." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ testSplit(new String[] { "»Der Papagei ist grün.« ", "Das kam so." });
+ testSplit(new String[] { "»Der Papagei ist grün«, sagte er" });
+
+ // incorrect sentences:
+ testSplit(new String[] { "Dies ist ein Satz. ", " und der nächste fängt klein an - das ist falsch." });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // test adding own list of abbreviations:
+ TestTools.testSplit(new String[] { "Hier ist foo. ", "keine Abk. im Text." }, sTokenizer);
+ TestTools.testSplit(new String[] { "Hier ist foo. eine Abk. im Text." }, sTokenizerWithFoo);
+ }
+
+ private void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, sTokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java
new file mode 100644
index 0000000..4ef4e78
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java
@@ -0,0 +1,83 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.nl;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+/**
+ * @author Daniel Naber
+ * @author Adapted by R. Baars for Dutch *
+ */
+public class DutchSRXSentenceTokenizerTest extends TestCase {
+
+ private SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("nl");
+
+ public void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ }
+
+ public void testTokenize() {
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dit is een zin." });
+ testSplit(new String[] { "Dit is een zin. ", "Nog een." });
+ testSplit(new String[] { "Een zin! ", "Nog een." });
+ testSplit(new String[] { "Een zin... ", "Nog een." });
+ testSplit(new String[] { "Op http://www.test.de vind je een website." });
+ testSplit(new String[] { "De brief is op 3.10 gedateerd." });
+ testSplit(new String[] { "De brief is op 31.1 gedateerd." });
+ testSplit(new String[] { "De breif is op 3.10.2000 gedateerd." });
+
+ testSplit(new String[] { "Vandaag is het 13.12.2004." });
+ testSplit(new String[] { "Op 24.09 begint het." });
+ testSplit(new String[] { "Om 17:00 begint het." });
+ testSplit(new String[] { "In paragraaf 3.9.1 is dat beschreven." });
+
+ testSplit(new String[] { "Januari jl. is dat vastgelegd." });
+ testSplit(new String[] { "Appel en pruimen enz. werden gekocht." });
+ testSplit(new String[] { "De afkorting n.v.t. betekent niet van toepassing." });
+
+ testSplit(new String[] { "Bla et al. blah blah." });
+
+ testSplit(new String[] { "Dat is,, of het is bla." });
+ testSplit(new String[] { "Dat is het.. ", "Zo gaat het verder." });
+
+ testSplit(new String[] { "Dit hier is een(!) zin." });
+ testSplit(new String[] { "Dit hier is een(!!) zin." });
+ testSplit(new String[] { "Dit hier is een(?) zin." });
+ testSplit(new String[] { "Dit hier is een(???) zin." });
+ testSplit(new String[] { "Dit hier is een(???) zin." });
+
+ testSplit(new String[] { "»De papagaai is groen.« ", "Dat was hij al." });
+ testSplit(new String[] { "»De papagaai is groen«, zei hij." });
+
+ testSplit(new String[] {"Als voetballer wordt hij nooit een prof. ", "Maar prof. N.A.W. Th.Ch. Janssen wordt dat wel."});
+
+ // TODO, zin na dubbele punt
+ testSplit(new String[] { "Dat was het: helemaal niets." });
+ testSplit(new String[] { "Dat was het: het is een nieuwe zin." });
+ }
+
+ private void testSplit(String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java
new file mode 100644
index 0000000..be2aab1
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java
@@ -0,0 +1,38 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.nl;
+
+import junit.framework.TestCase;
+
+import java.util.List;
+
+public class DutchWordTokenizerTest extends TestCase {
+
+ public void testTokenize() {
+ DutchWordTokenizer w = new DutchWordTokenizer();
+ List<String> testList = w.tokenize("This is\u00A0a test");
+ assertEquals(testList.size(), 7);
+ assertEquals("[This, , is, \u00A0, a, , test]", testList.toString());
+ testList = w.tokenize("Bla bla oma's bla bla 'test");
+ assertEquals(testList.size(), 12);
+ assertEquals("[Bla, , bla, , oma's, , bla, , bla, , ', test]",
+ testList.toString());
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java
new file mode 100644
index 0000000..3fa11f5
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java
@@ -0,0 +1,152 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.pl;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+public class PolishSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("pl");
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("pl");
+
+ public final void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public final void testTokenize() {
+
+ testSplit(new String[] { "This is a sentence. " });
+
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+ testSplit(new String[] { "To się wydarzyło 3.10.2000 i mam na to dowody." });
+
+ testSplit(new String[] { "To było 13.12 - nikt nie zapomni tego przemówienia." });
+ testSplit(new String[] { "Heute ist der 13.12.2004." });
+ testSplit(new String[] { "To jest np. ten debil spod jedynki." });
+ testSplit(new String[] { "To jest 1. wydanie." });
+ testSplit(new String[] { "Dziś jest 13. rocznica powstania wąchockiego." });
+
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ testSplit(new String[] { "To jest tzw. premier." });
+ testSplit(new String[] { "Jarek kupił sobie kurteczkę, tj. strój Marka." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ testSplit(new String[] { "„Prezydent jest niemądry”. ", "Tak wyszło." });
+ testSplit(new String[] { "„Prezydent jest niemądry”, powiedział premier" });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ",
+ "And here's one that's not comp" });
+
+ // Tests taken from LanguageTool's SentenceSplitterTest.py:
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split strings like U.S.A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ",
+ "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ",
+ "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ",
+ "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ // known to fail:
+ // testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools
+ .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools
+ .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!",
+ "He lives in Spain now." });
+
+ // from user bug reports:
+ testSplit(new String[] { "Temperatura wody w systemie wynosi 30°C.",
+ "W skład obiegu otwartego wchodzi zbiornik i armatura." });
+ testSplit(new String[] { "Zabudowano kolumny o długości 45 m. ",
+ "Woda z ujęcia jest dostarczana do zakładu." });
+
+ // two-letter initials:
+ testSplit(new String[] { "Najlepszym polskim reżyserem był St. Różewicz. ", "Chodzi o brata wielkiego poety." });
+
+ // From the abbreviation list:
+ testSplit(new String[] { "Ks. Jankowski jest prof. teologii." });
+ testSplit(new String[] { "To wydarzyło się w 1939 r.",
+ "To był burzliwy rok." });
+ testSplit(new String[] { "Prezydent jest popierany przez 20 proc. społeczeństwa." });
+ testSplit(new String[] {
+ "Moje wystąpienie ma na celu zmobilizowanie zarządu partii do działań, które umożliwią uzyskanie 40 proc.",
+ "Nie widzę dziś na scenie politycznej formacji, która lepiej by łączyła różne poglądy" });
+ testSplit(new String[] { "To jest zmienna A.", "Zaś to jest zmienna B." });
+ // SKROTY_BEZ_KROPKI in ENDABREVLIST
+ testSplit(new String[] { "Mam już 20 mln.", "To powinno mi wystarczyć" });
+ testSplit(new String[] { "Mam już 20 mln. buraków." });
+ // ellipsis
+ testSplit(new String[] { "Rytmem tej wiecznie przemijającej światowej egzystencji […] rytmem mesjańskiej natury jest szczęście." });
+ // sic!
+ testSplit(new String[] { "W gazecie napisali, że pasy (sic!) pogryzły człowieka." });
+ // Numbers with dots.
+ testSplit(new String[] { "Mam w magazynie dwie skrzynie LMD20. ", "Jestem żołnierzem i wiem, jak można ich użyć"});
+ }
+
+ private final void testSplit(final String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer2);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java
new file mode 100644
index 0000000..7e94ac7
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java
@@ -0,0 +1,157 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.ro;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+/**
+ *
+ * @author Ionuț Păduraru
+ * @since 07.05.2009 10:28:59
+ *
+ */
+public class RomanianSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("ro");
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("ro");
+
+ public final void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public final void testTokenize() {
+
+ testSplit(new String[] { "Aceasta este o propozitie fara diacritice. " });
+ testSplit(new String[] { "Aceasta este o fraza fara diacritice. ",
+ "Propozitia a doua, tot fara diacritice. " });
+ testSplit(new String[] { "Aceasta este o propoziție cu diacritice. " });
+ testSplit(new String[] { "Aceasta este o propoziție cu diacritice. ",
+ "Propoziția a doua, cu diacritice. " });
+
+ testSplit(new String[] { "O propoziție! ", "Și încă o propoziție. "});
+ testSplit(new String[] { "O propoziție... ", "Și încă o propoziție. "});
+ testSplit(new String[] { "La adresa http://www.archeus.ro găsiți resurse lingvistice. "});
+ testSplit(new String[] { "Data de 10.02.2009 nu trebuie să fie separator de propoziții. "});
+ testSplit(new String[] { "Astăzi suntem în data de 07.05.2007. "});
+ testSplit(new String[] { "Astăzi suntem în data de 07/05/2007. "});
+ testSplit(new String[] { "La anumărul (1) avem puține informații. "});
+ testSplit(new String[] { "To jest 1. wydanie." });
+ testSplit(new String[] { "La anumărul 1. avem puține informații. "});
+ testSplit(new String[] { "La anumărul 13. avem puține informații. "});
+ testSplit(new String[] { "La anumărul 1.3.3 avem puține informații. "});
+
+ testSplit(new String[] { "O singură propoziție... "});
+ testSplit(new String[] { "Colegii mei s-au dus... "});
+ testSplit(new String[] { "O singură propoziție!!! "});
+ testSplit(new String[] { "O singură propoziție??? "});
+
+ testSplit(new String[] { "Propoziții: una și alta. "});
+
+ testSplit(new String[] { "Domnu' a plecat. "});
+ testSplit(new String[] { "Profu' de istorie tre' să predea lecția. "});
+ testSplit(new String[] { "Sal'tare! "});
+ testSplit(new String[] { "'Neaţa! "});
+ testSplit(new String[] { "Deodat'apare un urs. "});
+ // accente
+ testSplit(new String[] { "A făcut două cópii. "});
+ testSplit(new String[] { "Ionel adúnă acum ceea ce Maria aduná înainte să vin eu. "});
+
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Domnu' a plecat" });
+ testSplit(new String[] { "Domnu' a plecat. ",
+ "El nu a plecat" });
+
+ testSplit(new String[] { "Se pot întâlni și abrevieri precum S.U.A. " +
+ "sau B.C.R. într-o singură propoziție." });
+ testSplit(new String[] { "Se pot întâlni și abrevieri precum S.U.A. sau B.C.R. ",
+ "Aici sunt două propoziții." });
+ testSplit(new String[] { "Același lucru aici... ", "Aici sunt două propoziții." });
+ testSplit(new String[] { "Același lucru aici... dar cu o singură propoziție." });
+
+ testSplit(new String[] { "„O propoziție!” ", "O alta." });
+ testSplit(new String[] { "„O propoziție!!!” ", "O alta." });
+ testSplit(new String[] { "„O propoziție?” ", "O alta." });
+ testSplit(new String[] { "„O propoziție?!?” ", "O alta." });
+ testSplit(new String[] { "«O propoziție!» ", "O alta." });
+ testSplit(new String[] { "«O propoziție!!!» ", "O alta." });
+ testSplit(new String[] { "«O propoziție?» ", "O alta." });
+ testSplit(new String[] { "«O propoziție???» ", "O alta." });
+ testSplit(new String[] { "«O propoziție?!?» ", "O alta." });
+ testSplit(new String[] { "O primă propoziție. ", "(O alta.)" });
+
+ testSplit(new String[] { "A venit domnu' Vasile. " });
+ testSplit(new String[] { "A venit domnu' acela. " });
+
+ // one/two returns = paragraph = new sentence:
+ TestTools.testSplit(new String[] { "A venit domnul\n\n", "Vasile." }, stokenizer2);
+ TestTools.testSplit(new String[] { "A venit domnul\n", "Vasile." }, stokenizer);
+ TestTools.testSplit(new String[] { "A venit domnu'\n\n", "Vasile." }, stokenizer2);
+ TestTools.testSplit(new String[] { "A venit domnu'\n", "Vasile." }, stokenizer);
+ // Missing space after sentence end:
+ testSplit(new String[] { "El este din România!",
+ "Acum e plecat cu afaceri." });
+
+ testSplit(new String[] { "Temperatura este de 30°C.", "Este destul de cald." });
+ testSplit(new String[] { "A alergat 50 m. ",
+ "Deja a obosit." });
+
+ // From the abbreviation list:
+ testSplit(new String[] { "Pentru dvs. vom face o excepție." });
+ testSplit(new String[] { "Pt. dumneavoastră vom face o excepție." });
+ testSplit(new String[] { "Pt. dvs. vom face o excepție." });
+ // din punct de vedere
+ testSplit(new String[] { "A expus problema d.p.d.v. artistic." });
+ testSplit(new String[] { "A expus problema dpdv. artistic." });
+ // şi aşa mai departe.
+ testSplit(new String[] { "Are mere, pere, șamd. dar nu are alune." });
+ testSplit(new String[] { "Are mere, pere, ș.a.m.d. dar nu are alune." });
+ testSplit(new String[] { "Are mere, pere, ș.a.m.d. ", "În schimb, nu are alune." });
+ // şi celelalte
+ testSplit(new String[] { "Are mere, pere, ş.c.l. dar nu are alune." });
+ testSplit(new String[] { "Are mere, pere, ş.c.l. ", "Nu are alune." });
+ // etc. et cetera
+ testSplit(new String[] { "Are mere, pere, etc. dar nu are alune." });
+ testSplit(new String[] { "Are mere, pere, etc. ", "Nu are alune." });
+ // ş.a. - şi altele
+ testSplit(new String[] { "Are mere, pere, ș.a. dar nu are alune." });
+ // M.Ap.N. - Ministerul Apărării Nationale
+ // there are 2 rules for this in segment.srx. Can this be done with only one rule?
+ testSplit(new String[] { "A fost și la M.Ap.N. dar nu l-au primit. " });
+ testSplit(new String[] { "A fost și la M.Ap.N. ", "Nu l-au primit. " });
+
+ // sic!
+ testSplit(new String[] { "Apo' da' tulai (sic!) că mult mai e de mers." });
+ testSplit(new String[] { "Apo' da' tulai(sic!) că mult mai e de mers." });
+
+ // […]
+ testSplit(new String[] { "Aici este o frază […] mult prescurtată." });
+ testSplit(new String[] { "Aici este o frază [...] mult prescurtată." });
+ }
+
+ private final void testSplit(final String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer2);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java
new file mode 100644
index 0000000..055a0ee
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java
@@ -0,0 +1,122 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.ro;
+
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class RomanianWordTokenizerTest extends TestCase {
+
+ public void testTokenize() {
+ // basic test - simle words, no diacritics
+ RomanianWordTokenizer w = new RomanianWordTokenizer();
+ List<String> testList = w.tokenize("Aceaste mese sunt bune");
+ assertEquals(testList.size(), 7);
+ assertEquals("[Aceaste, , mese, , sunt, , bune]", testList.toString());
+
+ // basic test - simle words, with diacritics
+ testList = w.tokenize("Această carte este frumoasă");
+ assertEquals(testList.size(), 7);
+ assertEquals("[Această, , carte, , este, , frumoasă]", testList.toString());
+
+ // test for "-"
+ testList = w.tokenize("nu-ți doresc");
+ assertEquals(testList.size(), 5);
+ assertEquals("[nu, -, ți, , doresc]",
+ testList.toString());
+
+ // test for "„"
+ testList = w.tokenize("zicea „merge");
+ assertEquals(testList.size(), 4);
+ assertEquals("[zicea, , „, merge]",
+ testList.toString());
+
+ // test for "„" with white space
+ testList = w.tokenize("zicea „ merge");
+ assertEquals(testList.size(), 5);
+ assertEquals("[zicea, , „, , merge]",
+ testList.toString());
+
+ // test for "„"
+ testList = w.tokenize("zicea merge”");
+ assertEquals(testList.size(), 4);
+ assertEquals("[zicea, , merge, ”]",
+ testList.toString());
+
+ // test for "„" and "„"
+ testList = w.tokenize("zicea „merge bine”");
+ assertEquals(testList.size(), 7);
+ assertEquals("[zicea, , „, merge, , bine, ”]",
+ testList.toString());
+
+ //ți-am
+ testList = w.tokenize("ți-am");
+ assertEquals(testList.size(), 3);
+ assertEquals("[ți, -, am]",
+ testList.toString());
+
+ // test for "«" and "»"
+ testList = w.tokenize("zicea «merge bine»");
+ assertEquals(testList.size(), 7);
+ assertEquals("[zicea, , «, merge, , bine, »]",
+ testList.toString());
+ // test for "<" and ">"
+ testList = w.tokenize("zicea <<merge bine>>");
+ assertEquals(testList.size(), 9);
+ assertEquals("[zicea, , <, <, merge, , bine, >, >]",
+ testList.toString());
+ // test for "%"
+ testList = w.tokenize("avea 15% apă");
+ assertEquals(testList.size(), 6);
+ assertEquals("[avea, , 15, %, , apă]",
+ testList.toString());
+ // test for "°"
+ testList = w.tokenize("are 30°C");
+ assertEquals(testList.size(), 5);
+ assertEquals("[are, , 30, °, C]",
+ testList.toString());
+ // test for "="
+ testList = w.tokenize("fructe=mere");
+ assertEquals(testList.size(), 3);
+ assertEquals("[fructe, =, mere]",
+ testList.toString());
+ // test for "|"
+ testList = w.tokenize("pere|mere");
+ assertEquals(testList.size(), 3);
+ assertEquals("[pere, |, mere]",
+ testList.toString());
+ // test for "\n"
+ testList = w.tokenize("pere\nmere");
+ assertEquals(testList.size(), 3);
+ assertEquals("[pere, \n, mere]",
+ testList.toString());
+ // test for "\r"
+ testList = w.tokenize("pere\rmere");
+ assertEquals(testList.size(), 3);
+ assertEquals("[pere, \r, mere]",
+ testList.toString());
+ // test for "\n\r"
+ testList = w.tokenize("pere\n\rmere");
+ assertEquals(testList.size(), 4);
+ assertEquals("[pere, \n, \r, mere]",
+ testList.toString());
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java
new file mode 100644
index 0000000..cc7d101
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java
@@ -0,0 +1,143 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.sk;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+public class SlovakSentenceTokenizerTest extends TestCase {
+
+ // accept \n as paragraph:
+ private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("sk");
+ // accept only \n\n as paragraph:
+ private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("sk");
+
+ public final void setUp() {
+ stokenizer.setSingleLineBreaksMarksParagraph(true);
+ stokenizer2.setSingleLineBreaksMarksParagraph(false);
+ }
+
+ public final void testTokenize() {
+
+ testSplit(new String[] { "This is a sentence. " });
+
+ // NOTE: sentences here need to end with a space character so they
+ // have correct whitespace when appended:
+ testSplit(new String[] { "Dies ist ein Satz." });
+ testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz! ", "Noch einer." });
+ testSplit(new String[] { "Ein Satz... ", "Noch einer." });
+ testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." });
+
+ testSplit(new String[] { "Das ist,, also ob es bla." });
+ testSplit(new String[] { "Das ist es.. ", "So geht es weiter." });
+
+ testSplit(new String[] { "Das hier ist ein(!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(!!) Satz." });
+ testSplit(new String[] { "Das hier ist ein(?) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+ testSplit(new String[] { "Das hier ist ein(???) Satz." });
+
+ // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein
+ // ganzer Satz kommt oder nicht:
+ testSplit(new String[] { "Das war es: gar nichts." });
+ testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." });
+
+ // incomplete sentences, need to work for on-thy-fly checking of texts:
+ testSplit(new String[] { "Here's a" });
+ testSplit(new String[] { "Here's a sentence. ",
+ "And here's one that's not comp" });
+
+ testSplit(new String[] { "„Prezydent jest niemądry”. ", "Tak wyszło." });
+ testSplit(new String[] { "„Prezydent jest niemądry”, powiedział premier" });
+
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." });
+ testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." });
+ testSplit(new String[] { "Toto 2. vydanie bolo rozobrané za 1,5 roka." });
+ testSplit(new String[] { "Festival Bažant Pohoda slávi svoje 10. výročie." });
+ testSplit(new String[] { "Dlho odkladané parlamentné voľby v Angole sa uskutočnia 5. septembra." });
+ testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." });
+
+ // From the abbreviation list:
+ testSplit(new String[] { "Aké sú skutočné príčiny tzv. transformačných príznakov?" });
+ testSplit(new String[] { "Aké príplatky zamestnancovi (napr. za nadčas) stanovuje Zákonník práce?" });
+ testSplit(new String[] { "Počas neprítomnosti zastupuje MUDr. Marianna Krupšová." });
+ testSplit(new String[] { "Staroveký Egypt vznikol okolo r. 3150 p.n.l. (tzn. 3150 pred Kr.). ",
+ "A zanikol v r. 31 pr. Kr." });
+
+ // from user bug reports:
+ testSplit(new String[] { "Temperatura wody w systemie wynosi 30°C.",
+ "W skład obiegu otwartego wchodzi zbiornik i armatura." });
+ testSplit(new String[] { "Zabudowano kolumny o długości 45 m. ",
+ "Woda z ujęcia jest dostarczana do zakładu." });
+
+ // two-letter initials:
+ testSplit(new String[] { "Najlepszym polskim reżyserem był St. Różewicz. ", "Chodzi o brata wielkiego poety." });
+ testSplit(new String[] { "Nore M. hrozí za podvod 10 až 15 rokov." });
+ testSplit(new String[] { "To jest zmienna A.", "Zaś to jest zmienna B." });
+ // Numbers with dots.
+ testSplit(new String[] { "Mam w magazynie dwie skrzynie LMD20. ", "Jestem żołnierzem i wiem, jak można ich użyć"});
+ // ellipsis
+ testSplit(new String[] { "Rytmem tej wiecznie przemijającej światowej egzystencji […] rytmem mesjańskiej natury jest szczęście." });
+
+
+ // Tests taken from LanguageTool's SentenceSplitterTest.py:
+ testSplit(new String[] { "This is a sentence. " });
+ testSplit(new String[] { "This is a sentence. ", "And this is another one." });
+ testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." });
+
+ testSplit(new String[] { "Don't split strings like U. S. A. either." });
+ testSplit(new String[] { "Don't split strings like U.S.A. either." });
+ testSplit(new String[] { "Don't split... ", "Well you know. ",
+ "Here comes more text." });
+ testSplit(new String[] { "Don't split... well you know. ",
+ "Here comes more text." });
+ testSplit(new String[] { "The \".\" should not be a delimiter in quotes." });
+ testSplit(new String[] { "\"Here he comes!\" she said." });
+ testSplit(new String[] { "\"Here he comes!\", she said." });
+ testSplit(new String[] { "\"Here he comes.\" ",
+ "But this is another sentence." });
+ testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." });
+ testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" });
+ // known to fail:
+ // testSplit(new String[]{"He won't. ", "Really."});
+ testSplit(new String[] { "He won't go. ", "Really." });
+ testSplit(new String[] { "He won't say no.", "Not really." });
+ testSplit(new String[] { "This is it: a test." });
+ // one/two returns = paragraph = new sentence:
+ TestTools
+ .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer);
+ TestTools
+ .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2);
+ TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2);
+ // Missing space after sentence end:
+ testSplit(new String[] { "James is from the Ireland!",
+ "He lives in Spain now." });
+ }
+
+ private final void testSplit(final String[] sentences) {
+ TestTools.testSplit(sentences, stokenizer2);
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java
new file mode 100644
index 0000000..8e24005
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java
@@ -0,0 +1,263 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.en.AvsAnRule;
+import junit.framework.TestCase;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.danielnaber.languagetool.Language;
+
+/**
+ * @author Daniel Naber
+ */
+public class StringToolsTest extends TestCase {
+
+ public void testAssureSet() {
+ String s = "";
+ try {
+ StringTools.assureSet(s, "varName");
+ fail();
+ } catch (IllegalArgumentException e) {
+ // expected exception
+ }
+ s = " \t";
+ try {
+ StringTools.assureSet(s, "varName");
+ fail();
+ } catch (IllegalArgumentException e) {
+ // expected exception
+ }
+ s = null;
+ try {
+ StringTools.assureSet(s, "varName");
+ fail();
+ } catch (NullPointerException e) {
+ // expected exception
+ }
+ s = "foo";
+ StringTools.assureSet(s, "varName");
+ }
+
+ public void testReadFile() throws IOException {
+ final String content = StringTools.readFile(new FileInputStream("src/test/testinput.txt"), "utf-8");
+ assertEquals("one\ntwo\nöäüß\n", content);
+ }
+
+ public void testIsAllUppercase() {
+ assertTrue(StringTools.isAllUppercase("A"));
+ assertTrue(StringTools.isAllUppercase("ABC"));
+ assertTrue(StringTools.isAllUppercase("ASV-EDR"));
+ assertTrue(StringTools.isAllUppercase("ASV-ÖÄÜ"));
+ assertTrue(StringTools.isAllUppercase(""));
+
+ assertFalse(StringTools.isAllUppercase("ß"));
+ assertFalse(StringTools.isAllUppercase("AAAAAAAAAAAAq"));
+ assertFalse(StringTools.isAllUppercase("a"));
+ assertFalse(StringTools.isAllUppercase("abc"));
+ }
+
+ public void testIsMixedCase() {
+ assertTrue(StringTools.isMixedCase("AbC"));
+ assertTrue(StringTools.isMixedCase("MixedCase"));
+ assertTrue(StringTools.isMixedCase("iPod"));
+ assertTrue(StringTools.isMixedCase("AbCdE"));
+
+ assertFalse(StringTools.isMixedCase(""));
+ assertFalse(StringTools.isMixedCase("ABC"));
+ assertFalse(StringTools.isMixedCase("abc"));
+ assertFalse(StringTools.isMixedCase("!"));
+ assertFalse(StringTools.isMixedCase("Word"));
+ }
+
+ public void testIsCapitalizedWord() {
+ assertTrue(StringTools.isCapitalizedWord("Abc"));
+ assertTrue(StringTools.isCapitalizedWord("Uppercase"));
+ assertTrue(StringTools.isCapitalizedWord("Ipod"));
+
+ assertFalse(StringTools.isCapitalizedWord(""));
+ assertFalse(StringTools.isCapitalizedWord("ABC"));
+ assertFalse(StringTools.isCapitalizedWord("abc"));
+ assertFalse(StringTools.isCapitalizedWord("!"));
+ assertFalse(StringTools.isCapitalizedWord("wOrD"));
+ }
+
+ public void testStartsWithUppercase() {
+ assertTrue(StringTools.startsWithUppercase("A"));
+ assertTrue(StringTools.startsWithUppercase("ÄÖ"));
+
+ assertFalse(StringTools.startsWithUppercase(""));
+ assertFalse(StringTools.startsWithUppercase("ß"));
+ assertFalse(StringTools.startsWithUppercase("-"));
+ }
+
+ public void testUppercaseFirstChar() {
+ assertEquals("", StringTools.uppercaseFirstChar(""));
+ assertEquals("A", StringTools.uppercaseFirstChar("A"));
+ assertEquals("Öäü", StringTools.uppercaseFirstChar("öäü"));
+ assertEquals("ßa", StringTools.uppercaseFirstChar("ßa"));
+ assertEquals("'Test'", StringTools.uppercaseFirstChar("'test'"));
+ assertEquals("''Test", StringTools.uppercaseFirstChar("''test"));
+ assertEquals("''T", StringTools.uppercaseFirstChar("''t"));
+ assertEquals("'''", StringTools.uppercaseFirstChar("'''"));
+ }
+
+ public void testLowercaseFirstChar() {
+ assertEquals("", StringTools.lowercaseFirstChar(""));
+ assertEquals("a", StringTools.lowercaseFirstChar("A"));
+ assertEquals("öäü", StringTools.lowercaseFirstChar("Öäü"));
+ assertEquals("ßa", StringTools.lowercaseFirstChar("ßa"));
+ assertEquals("'test'", StringTools.lowercaseFirstChar("'Test'"));
+ assertEquals("''test", StringTools.lowercaseFirstChar("''Test"));
+ assertEquals("''t", StringTools.lowercaseFirstChar("''T"));
+ assertEquals("'''", StringTools.lowercaseFirstChar("'''"));
+ }
+
+ public void testReaderToString() throws IOException {
+ final String str = StringTools.readerToString(new StringReader("bla\nöäü"));
+ assertEquals("bla\nöäü", str);
+ final StringBuilder longStr = new StringBuilder();
+ for (int i = 0; i < 4000; i++) {
+ longStr.append("x");
+ }
+ longStr.append("1234567");
+ assertEquals(4007, longStr.length());
+ final String str2 = StringTools.readerToString(new StringReader(longStr.toString()));
+ assertEquals(longStr.toString(), str2);
+ }
+
+ public void testEscapeXMLandHTML() {
+ assertEquals("!ä&quot;&lt;&gt;&amp;&amp;", StringTools.escapeXML("!ä\"<>&&"));
+ assertEquals("!ä&quot;&lt;&gt;&amp;&amp;", StringTools.escapeHTML("!ä\"<>&&"));
+ }
+
+ public void testRuleMatchesToXML() throws IOException {
+ final List<RuleMatch> matches = new ArrayList<RuleMatch>();
+ final String text = "This is an test sentence. Here's another sentence with more text.";
+ final RuleMatch match = new RuleMatch(new AvsAnRule(null), 8, 10, "myMessage");
+ match.setColumn(99);
+ match.setEndColumn(100);
+ match.setLine(44);
+ match.setEndLine(45);
+ matches.add(match);
+ final String xml = StringTools.ruleMatchesToXML(matches, text, 5, StringTools.XmlPrintMode.NORMAL_XML);
+ assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
+ "<matches>\n" +
+ "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" errorlength=\"2\"/>\n" +
+ "</matches>\n", xml);
+ }
+
+ public void testListToString() {
+ final List<String> list = new ArrayList<String>();
+ list.add("foo");
+ list.add("bar");
+ list.add(",");
+ assertEquals("foo,bar,,", StringTools.listToString(list, ","));
+ assertEquals("foo\tbar\t,", StringTools.listToString(list, "\t"));
+ }
+
+ public void testGetContext() {
+ final String input = "This is a test sentence. Here's another sentence with more text.";
+ final String result = StringTools.getContext(8, 14, input, 5);
+ assertEquals("...s is a test sent...\n ^^^^^^ ", result);
+ }
+
+ public void testTrimWhitespace() {
+ try {
+ assertEquals(null, StringTools.trimWhitespace(null));
+ fail();
+ } catch (NullPointerException e) {
+ // expected
+ }
+ assertEquals("", StringTools.trimWhitespace(""));
+ assertEquals("", StringTools.trimWhitespace(" "));
+ assertEquals("XXY", StringTools.trimWhitespace(" \nXX\t Y"));
+ // TODO: make this work assertEquals("XXY", StringTools.trimWhitespace(" \r\nXX\t Y"));
+ assertEquals("word", StringTools.trimWhitespace("word"));
+ }
+
+ public void testAddSpace() {
+ assertEquals(" ", StringTools.addSpace("word", Language.ENGLISH));
+ assertEquals("", StringTools.addSpace(",", Language.ENGLISH));
+ assertEquals("", StringTools.addSpace(",", Language.FRENCH));
+ assertEquals("", StringTools.addSpace(",", Language.ENGLISH));
+ assertEquals(" ", StringTools.addSpace(":", Language.FRENCH));
+ assertEquals("", StringTools.addSpace(",", Language.ENGLISH));
+ assertEquals(" ", StringTools.addSpace(";", Language.FRENCH));
+ }
+
+ public void testGetLabel() {
+ assertEquals("This is a Label", StringTools.getLabel("This is a &Label"));
+ assertEquals("Bits & Pieces", StringTools.getLabel("Bits && Pieces"));
+ }
+
+ public void testGetOOoLabel() {
+ assertEquals("This is a ~Label", StringTools.getOOoLabel("This is a &Label"));
+ assertEquals("Bits & Pieces", StringTools.getLabel("Bits && Pieces"));
+ }
+
+ public void testGetMnemonic() {
+ assertEquals('F', StringTools.getMnemonic("&File"));
+ assertEquals('O', StringTools.getMnemonic("&OK"));
+ assertEquals('\u0000',
+ StringTools.getMnemonic("File && String operations"));
+ assertEquals('O',
+ StringTools.getMnemonic("File && String &Operations"));
+ }
+
+ public void testIsWhitespace() {
+ assertEquals(true, StringTools.isWhitespace(" "));
+ assertEquals(true, StringTools.isWhitespace("\t"));
+ assertEquals(true, StringTools.isWhitespace("\u2002"));
+ //non-breaking space is not a whitespace
+ assertEquals(false, StringTools.isWhitespace("\u00a0"));
+ assertEquals(false, StringTools.isWhitespace("abc"));
+ //non-breaking OOo field
+ assertEquals(false, StringTools.isWhitespace("\\u02"));
+ assertEquals(false, StringTools.isWhitespace("\u0001"));
+ }
+
+ public void testIsPositiveNumber() {
+ assertEquals(true, StringTools.isPositiveNumber('3'));
+ assertEquals(false, StringTools.isPositiveNumber('a'));
+ }
+
+ public void testIsEmpty() {
+ assertEquals(true, StringTools.isEmpty(""));
+ assertEquals(true, StringTools.isEmpty(null));
+ assertEquals(false, StringTools.isEmpty("a"));
+ }
+
+ public void testFilterXML() {
+ assertEquals("test", StringTools.filterXML("test"));
+ assertEquals("<<test>>", StringTools.filterXML("<<test>>"));
+ assertEquals("test", StringTools.filterXML("<b>test</b>"));
+ assertEquals("A sentence with a test", StringTools.filterXML("A sentence with a <em>test</em>"));
+ }
+
+ public void testAsString() {
+ assertNull(StringTools.asString(null));
+ assertEquals("foo!", "foo!");
+ }
+
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java
new file mode 100644
index 0000000..91c47ac
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java
@@ -0,0 +1,105 @@
+package de.danielnaber.languagetool.tools;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.bitext.BitextRule;
+
+public class ToolsTest extends TestCase {
+
+ private ByteArrayOutputStream out;
+ private ByteArrayOutputStream err;
+ private PrintStream stdout;
+ private PrintStream stderr;
+
+ public void setUp() throws Exception {
+ super.setUp();
+ this.stdout = System.out;
+ this.stderr = System.err;
+ this.out = new ByteArrayOutputStream();
+ this.err = new ByteArrayOutputStream();
+ System.setOut(new PrintStream(this.out));
+ System.setErr(new PrintStream(this.err));
+ }
+
+ public void tearDown() throws Exception {
+ super.tearDown();
+ System.setOut(this.stdout);
+ System.setErr(this.stderr);
+ }
+
+ public void testCheck() throws IOException, ParserConfigurationException, SAXException {
+ final JLanguageTool tool = new JLanguageTool(Language.POLISH);
+ tool.activateDefaultPatternRules();
+ tool.activateDefaultFalseFriendRules();
+
+ int matches = Tools.checkText("To jest całkowicie prawidłowe zdanie.", tool);
+ String output = new String(this.out.toByteArray());
+ assertEquals(0, output.indexOf("Time:"));
+ assertEquals(0, matches);
+
+ matches = Tools.checkText("To jest jest problem.", tool);
+ output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Rule ID: WORD_REPEAT_RULE") != -1);
+ assertEquals(1, matches);
+ }
+
+ public void testCorrect() throws IOException, ParserConfigurationException, SAXException {
+ JLanguageTool tool = new JLanguageTool(Language.POLISH);
+ tool.activateDefaultPatternRules();
+ tool.activateDefaultFalseFriendRules();
+
+ String correct = Tools.correctText("To jest całkowicie prawidłowe zdanie.", tool);
+ assertEquals("To jest całkowicie prawidłowe zdanie.", correct);
+ correct = Tools.correctText("To jest jest problem.", tool);
+ assertEquals("To jest problem.", correct);
+
+ // more sentences, need to apply more suggestions > 1 in subsequent sentences
+ correct = Tools.correctText("To jest jest problem. Ale to już już nie jest problem.", tool);
+ assertEquals("To jest problem. Ale to już nie jest problem.", correct);
+ correct = Tools.correctText("To jest jest problem. Ale to już już nie jest problem. Tak sie nie robi. W tym zdaniu brakuje przecinka bo go zapomniałem.", tool);
+ assertEquals("To jest problem. Ale to już nie jest problem. Tak się nie robi. W tym zdaniu brakuje przecinka, bo go zapomniałem.", correct);
+
+ //now English
+ tool = new JLanguageTool(Language.ENGLISH);
+ tool.activateDefaultPatternRules();
+ tool.activateDefaultFalseFriendRules();
+
+ assertEquals("This is a test.", Tools.correctText("This is an test.", tool));
+
+ }
+
+ public void testBitextCheck() throws IOException, ParserConfigurationException, SAXException {
+ final JLanguageTool srcTool = new JLanguageTool(Language.ENGLISH);
+ final JLanguageTool trgTool = new JLanguageTool(Language.POLISH);
+ trgTool.activateDefaultPatternRules();
+
+ final List<BitextRule> rules = Tools.getBitextRules(Language.ENGLISH, Language.POLISH);
+
+ int matches = Tools.checkBitext(
+ "This is a perfectly good sentence.",
+ "To jest całkowicie prawidłowe zdanie.", srcTool, trgTool, rules,
+ false, StringTools.XmlPrintMode.NORMAL_XML);
+ String output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Time:") == 0);
+ assertEquals(0, matches);
+
+ matches = Tools.checkBitext(
+ "This is not actual.",
+ "To nie jest aktualne.",
+ srcTool, trgTool,
+ rules, false, StringTools.XmlPrintMode.NORMAL_XML);
+ output = new String(this.out.toByteArray());
+ assertTrue(output.indexOf("Rule ID: ACTUAL") != -1);
+ assertEquals(1, matches);
+ }
+}
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java
new file mode 100644
index 0000000..8aa79c2
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java
@@ -0,0 +1,39 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tools;
+
+import junit.framework.TestCase;
+
+public class UnsyncStackTest extends TestCase {
+
+ public UnsyncStackTest(String name) {
+ super(name);
+ }
+
+ public void testStack() {
+ UnsyncStack<String> stack = new UnsyncStack<String>();
+ assertTrue(stack.empty());
+ stack.push("test");
+ assertEquals("test", stack.peek());
+ assertFalse(stack.empty());
+ assertEquals("test", stack.pop());
+ assertTrue(stack.empty());
+ }
+}
diff --git a/JLanguageTool/src/test/testinput.txt b/JLanguageTool/src/test/testinput.txt
new file mode 100644
index 0000000..a0d7d78
--- /dev/null
+++ b/JLanguageTool/src/test/testinput.txt
@@ -0,0 +1,3 @@
+one
+two
+öäüß