diff options
Diffstat (limited to 'JLanguageTool/src/test')
106 files changed, 10216 insertions, 0 deletions
diff --git a/JLanguageTool/src/test/.cvsignore b/JLanguageTool/src/test/.cvsignore new file mode 100644 index 0000000..fbb76d7 --- /dev/null +++ b/JLanguageTool/src/test/.cvsignore @@ -0,0 +1 @@ +*.probescript diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java new file mode 100644 index 0000000..5d0661d --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AbstractSecurityTestCase.java @@ -0,0 +1,81 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + + +package de.danielnaber.languagetool; + +import junit.framework.TestCase; +import java.security.Permission; + +/** + * @author Charlie Collins (Maven Test Example from + * http://www.screaming-penguin.com/node/7570) + */ + +public class AbstractSecurityTestCase extends TestCase { + + public AbstractSecurityTestCase(String name) { + super(name); + } + + protected static class ExitException extends SecurityException { + private static final long serialVersionUID = 1L; + public final int status; + public ExitException(int status) { + super("There is no escape!"); + this.status = status; + } + } + + private static class NoExitSecurityManager extends SecurityManager { + @Override + public void checkPermission(@SuppressWarnings("unused") Permission perm) { + // allow anything. + } + + @Override + @SuppressWarnings("unused") + public void checkPermission(Permission perm, Object context) { + // allow anything. + } + + @Override + public void checkExit(int status) { + super.checkExit(status); + throw new ExitException(status); + } + } + + @Override + protected void setUp() throws Exception { + super.setUp(); + System.setSecurityManager(new NoExitSecurityManager()); + } + + @Override + protected void tearDown() throws Exception { + System.setSecurityManager(null); + super.tearDown(); + } + + //get rid of JUnit warning for this helper class + public void testSomething() { + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java new file mode 100644 index 0000000..4bd4ce1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenReadingsTest.java @@ -0,0 +1,58 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool; + +import junit.framework.TestCase; + +public class AnalyzedTokenReadingsTest extends TestCase { + + public void testNewTags() { + AnalyzedTokenReadings testanaTokRead = new AnalyzedTokenReadings(new AnalyzedToken("word", "POS", "lemma")); + assertEquals(false, testanaTokRead.isLinebreak()); + assertEquals(false, testanaTokRead.isSentEnd()); + assertEquals(false, testanaTokRead.isParaEnd()); + assertEquals(false, testanaTokRead.isSentStart()); + testanaTokRead.setSentEnd(); + assertEquals(false, testanaTokRead.isSentStart()); + assertEquals(true, testanaTokRead.isSentEnd()); + //test SEND_END or PARA_END added without directly via addReading + //which is possible e.g. in rule disambiguator + testanaTokRead = new AnalyzedTokenReadings(new AnalyzedToken("word", null, "lemma")); + testanaTokRead.addReading(new AnalyzedToken("word", "SENT_END", null)); + assertEquals(true, testanaTokRead.isSentEnd()); + assertEquals(false, testanaTokRead.isParaEnd()); + testanaTokRead.addReading(new AnalyzedToken("word", "PARA_END", null)); + assertEquals(true, testanaTokRead.isParaEnd()); + assertEquals(false, testanaTokRead.isSentStart()); + //but you can't add SENT_START to a non-empty token + //and get isSentStart == true + testanaTokRead.addReading(new AnalyzedToken("word", "SENT_START", null)); + assertEquals(false, testanaTokRead.isSentStart()); + AnalyzedToken aTok = new AnalyzedToken("word", "POS", "lemma"); + aTok.setWhitespaceBefore(true); + testanaTokRead = new AnalyzedTokenReadings(aTok); + assertEquals(aTok, testanaTokRead.getAnalyzedToken(0)); + AnalyzedToken aTok2 = new AnalyzedToken("word", "POS", "lemma"); + assertTrue(!aTok2.equals(testanaTokRead.getAnalyzedToken(0))); + AnalyzedToken aTok3 = new AnalyzedToken("word", "POS", "lemma"); + aTok3.setWhitespaceBefore(true); + assertEquals(aTok3, testanaTokRead.getAnalyzedToken(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java new file mode 100644 index 0000000..66f86a5 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/AnalyzedTokenTest.java @@ -0,0 +1,36 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool; + +import junit.framework.TestCase; + +public class AnalyzedTokenTest extends TestCase { + + public void testToString() { + AnalyzedToken testToken = new AnalyzedToken("word", "POS", "lemma"); + assertEquals("lemma/POS", testToken.toString()); + assertEquals("lemma", testToken.getLemma()); + testToken = new AnalyzedToken("word", "POS", null); + assertEquals("word/POS", testToken.toString()); + assertEquals(null, testToken.getLemma()); + assertEquals("word", testToken.getToken()); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java new file mode 100644 index 0000000..80afa8a --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/JLanguageToolTest.java @@ -0,0 +1,238 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool.paragraphHandling; +import de.danielnaber.languagetool.rules.RuleMatch; +import de.danielnaber.languagetool.rules.patterns.PatternRule; + +/** + * @author Daniel Naber + */ +public class JLanguageToolTest extends TestCase { + + // used on http://www.languagetool.org/usage/ + /* + public void testDemo() throws IOException { + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + langTool.activateDefaultPatternRules(); + List<RuleMatch> matches = langTool.check("A sentence " + + "with a error in the Hitchhiker's Guide tot he Galaxy"); + for (RuleMatch match : matches) { + System.out.println("Potential error at line " + + match.getEndLine() + ", column " + + match.getColumn() + ": " + match.getMessage()); + System.out.println("Suggested correction: " + + match.getSuggestedReplacements()); + } + } + */ + + + public void testEnglish() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + List<RuleMatch> matches = tool.check("A test that should not give errors."); + assertEquals(0, matches.size()); + matches = tool.check("A test test that should give errors."); + assertEquals(1, matches.size()); + matches = tool.check("I can give you more a detailed description."); + assertEquals(0, matches.size()); + assertEquals(8, tool.getAllRules().size()); + final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir() + + "/en/grammar.xml"); + for (PatternRule patternRule : rules) { + tool.addRule(patternRule); + } + assertTrue(tool.getAllRules().size() > 3); + matches = tool.check("I can give you more a detailed description."); + assertEquals(1, matches.size()); + tool.disableRule("MORE_A_JJ"); + matches = tool.check("I can give you more a detailed description."); + assertEquals(0, matches.size()); + tool.disableCategory("Possible Typos"); + matches = tool.check("I've go to go."); + assertEquals(0, matches.size()); + } + + public void testGerman() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.GERMAN); + List<RuleMatch> matches = tool.check("Ein Test, der keine Fehler geben sollte."); + assertEquals(0, matches.size()); + matches = tool.check("Ein Test Test, der Fehler geben sollte."); + assertEquals(1, matches.size()); + final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir() + + "/de/grammar.xml"); + for (PatternRule patternRule : rules) { + tool.addRule(patternRule); + } + tool.setListUnknownWords(true); + // German rule has no effect with English error: + matches = tool.check("I can give you more a detailed description"); + assertEquals(0, matches.size()); + //test unknown words listing + assertEquals("[I, can, detailed, give, more, you]", tool.getUnknownWords().toString()); + } + + public void testDutch() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.DUTCH); + final List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir() + + "/nl/grammar.xml"); + for (PatternRule patternRule : rules) { + tool.addRule(patternRule); + } + List<RuleMatch> matches = tool.check("Een test, die geen fouten mag geven."); + assertEquals(0, matches.size()); + matches = tool.check("Een test test, die een fout moet geven."); + assertEquals(1, matches.size()); + //test uppercasing rule: + /* + matches = tool.check("De Afdeling Beheer kan het"); + assertEquals(1, matches.size()); + assertEquals("Als Afdeling geen deel uitmaakt van de naam, dan is juist:<suggestion>afdeling</suggestion>", matches.get(0).getMessage()); + */ + // Dutch rule has no effect with English error: + matches = tool.check("I can give you more a detailed description"); + assertEquals(0, matches.size()); + } + + public void testPolish() throws IOException { + JLanguageTool tool = new JLanguageTool(Language.POLISH); + assertEquals("[PL]", Arrays.toString(Language.POLISH.getCountryVariants())); + List<RuleMatch> matches = tool.check("To jest całkowicie prawidłowe zdanie."); + assertEquals(0, matches.size()); + matches = tool.check("To jest jest problem."); + assertEquals(1, matches.size()); + //this rule is by default off + matches = tool.check("Był on bowiem pięknym strzelcem bowiem."); + assertEquals(0, matches.size()); + tool.enableDefaultOffRule("PL_WORD_REPEAT"); + matches = tool.check("Był on bowiem pięknym strzelcem bowiem."); + assertEquals(1, matches.size()); + List<PatternRule> rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir() + + "/pl/grammar.xml"); + for (final PatternRule rule : rules) { + tool.addRule(rule); + } + matches = tool.check("Premier drapie się w ucho co i rusz."); + assertEquals(1, matches.size()); + // Polish rule has no effect with English error: + matches = tool.check("I can give you more a detailed description"); + assertEquals(0, matches.size()); + tool.setListUnknownWords(true); + matches = tool.check("This is not a Polish text."); + assertEquals("[Polish, This, is]", tool.getUnknownWords().toString()); + //check positions relative to sentence ends + matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka."); + assertEquals(16, matches.get(0).getColumn()); + //with a space... + matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka."); + assertEquals(16, matches.get(0).getColumn()); + matches = tool.check("To jest tekst. Test 1. To jest linia w której nie ma przecinka."); + assertEquals(30, matches.get(0).getColumn()); + //recheck with the -b mode... + final Language lang = Language.POLISH; + lang.getSentenceTokenizer().setSingleLineBreaksMarksParagraph( + true); + tool = new JLanguageTool(lang); + rules = tool.loadPatternRules(JLanguageTool.getDataBroker().getRulesDir() + + "/pl/grammar.xml"); + for (final PatternRule rule : rules) { + tool.addRule(rule); + } + matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie ma przecinka."); + assertEquals(16, matches.get(0).getColumn()); + //with a space... + matches = tool.check("To jest tekst. \nTest 1. To jest linia w której nie ma przecinka."); + assertEquals(16, matches.get(0).getColumn()); + matches = tool.check("To jest tekst. To jest linia w której nie ma przecinka."); + assertEquals(23, matches.get(0).getColumn()); + + } + + public void testSlovenian() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.SLOVENIAN); + List<RuleMatch> matches = tool.check("Kupil je npr. jajca, moko in mleko."); + assertEquals(0, matches.size()); + } + + public void testCountLines() { + assertEquals(0, JLanguageTool.countLineBreaks("")); + assertEquals(1, JLanguageTool.countLineBreaks("Hallo,\nnächste Zeile")); + assertEquals(2, JLanguageTool.countLineBreaks("\nZweite\nDritte")); + assertEquals(4, JLanguageTool.countLineBreaks("\nZweite\nDritte\n\n")); + } + + + public void testAnalyzedSentence() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + //test soft-hyphen ignoring: + assertEquals("<S> This[this/DT] is[be/VBZ] a[a/DT] tested[tested/JJ,test/VBD,test/VBN,tested] sentence[sentence/NN,sentence/VB,sentence/VBP].[./.,</S>]", tool.getAnalyzedSentence("This is a test\u00aded sentence.").toString()); + //test paragraph ends adding + assertEquals("<S> </S><P/> ", tool.getAnalyzedSentence("\n").toString()); + } + + public void testParaRules() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + + //run normally + List<RuleMatch> matches = tool.check("(This is an quote.\n It ends in the second sentence."); + assertEquals(2, matches.size()); + assertEquals(2, tool.getSentenceCount()); + + //run in a sentence-only mode + matches = tool.check("(This is an quote.\n It ends in the second sentence.", false, paragraphHandling.ONLYNONPARA); + assertEquals(1, matches.size()); + assertEquals("EN_A_VS_AN", matches.get(0).getRule().getId()); + assertEquals(1, tool.getSentenceCount()); + + //run in a paragraph mode - single sentence + matches = tool.check("(This is an quote.\n It ends in the second sentence.", false, paragraphHandling.ONLYPARA); + assertEquals(1, matches.size()); + assertEquals("EN_UNPAIRED_BRACKETS", matches.get(0).getRule().getId()); + assertEquals(1, tool.getSentenceCount()); + + //run in a paragraph mode - many sentences + matches = tool.check("(This is an quote.\n It ends in the second sentence.", true, paragraphHandling.ONLYPARA); + assertEquals(1, matches.size()); + assertEquals("EN_UNPAIRED_BRACKETS", matches.get(0).getRule().getId()); + assertEquals(2, tool.getSentenceCount()); + } + + public void testWhitespace() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + final AnalyzedSentence raw = tool.getRawAnalyzedSentence("Let's do a \"test\", do you understand?"); + final AnalyzedSentence cooked = tool.getAnalyzedSentence("Let's do a \"test\", do you understand?"); + //test if there was a change + assertFalse(raw.equals(cooked)); + //see if nothing has been deleted + assertEquals(raw.getTokens().length, cooked.getTokens().length); + int i = 0; + for (final AnalyzedTokenReadings atr : raw.getTokens()) { + assertEquals(atr.isWhitespaceBefore(), + cooked.getTokens()[i].isWhitespaceBefore()); + i++; + } + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java new file mode 100644 index 0000000..b7c35ad --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/MainTest.java @@ -0,0 +1,386 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool; + +import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; + +import java.io.PrintStream; +import java.net.URISyntaxException; +import java.net.URI; +import java.net.URL; + +import javax.xml.parsers.ParserConfigurationException; +import org.xml.sax.SAXException; + +/** + * Tests the basic features of the command-line interface. + * + * @author Marcin Miłkowski + */ +public class MainTest extends AbstractSecurityTestCase { + + private static final String ENGLISH_TEST_FILE = "test-en.txt"; + + private ByteArrayOutputStream out; + private ByteArrayOutputStream err; + private PrintStream stdout; + private PrintStream stderr; + + public MainTest(String testName) { + super(testName); + } + + public void setUp() throws Exception { + super.setUp(); + this.stdout = System.out; + this.stderr = System.err; + this.out = new ByteArrayOutputStream(); + this.err = new ByteArrayOutputStream(); + System.setOut(new PrintStream(this.out)); + System.setErr(new PrintStream(this.err)); + } + + public void tearDown() throws Exception { + super.tearDown(); + System.setOut(this.stdout); + System.setErr(this.stderr); + } + + public void testUsageMessage() throws IOException, ParserConfigurationException, SAXException { + try { + String[] args = new String[] {"-h"}; + Main.main(args); + fail("LT should have exited with status 0!"); + } + catch (ExitException e) { + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Usage: java de.danielnaber.languagetool.Main [-r|--recursive] [-v|--verbose") != -1); + assertEquals("Exit status", 1, e.status); + } + } + + public void testEnglishFile() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + //System.err.println("###"+url); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + //System.out.println("#>"+output); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1); + } + + public void testEnglishFileVerbose() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "-v", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1); + String tagText = new String(this.err.toByteArray()); + assertTrue(tagText.indexOf("<S> This[this/DT] is[be/VBZ] an[a/DT] test[test/NN].[./.,</S>]") != -1); + } + + public void testEnglishFileApplySuggestions() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "--apply", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertEquals("This is a test.\n", output); + } + + + public void testEnglishStdIn1() throws IOException, ParserConfigurationException, SAXException { + final String test = "This is an test."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "en"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1); + } + + public void testEnglishStdIn2() throws IOException, ParserConfigurationException, SAXException { + final String test = "This is an test."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "en", "-"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("1.) Line 1, column 9, Rule ID: EN_A_VS_AN") != -1); + } + + public void testEnglishStdIn3() throws IOException, ParserConfigurationException, SAXException { + final String test = "This is an test."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "en", "-a", "-"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertEquals("This is a test.\n", output); + } + + //test line mode vs. para mode + //first line mode + public void testEnglishLineMode() throws IOException, ParserConfigurationException, SAXException { + final String test = "This is what I mean\nand you know it."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "en", "-a", "-b", "-"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertEquals("This is what I mean\nAnd you know it.\n", output); + } + + //first line mode + public void testEnglishParaMode() throws IOException, ParserConfigurationException, SAXException { + final String test = "This is what I mean\nand you know it."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "en", "-a", "-"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertEquals("This is what I mean\nand you know it.\n", output); + } + + public void testPolishStdInDefaultOff() throws IOException, ParserConfigurationException, SAXException { + final String test = "To jest test, który zrobiłem, który mi się podoba."; + final byte[] b = test.getBytes(); + System.setIn(new ByteArrayInputStream(b)); + String[] args = new String[] {"-l", "pl", "-e", "PL_WORD_REPEAT", "-"}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf("Working on STDIN...") != -1); + assertTrue(output.indexOf("1.) Line 1, column 31, Rule ID: PL_WORD_REPEAT") != -1); + } + + public void testEnglishFileRuleDisabled() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "-d", "EN_A_VS_AN", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("Rule ID: EN_A_VS_AN") == -1); + } + + public void testEnglishFileRuleEnabled() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "-e", "EN_A_VS_AN", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("Rule ID: EN_A_VS_AN") != -1); + } + + public void testEnglishFileAPI() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "--api", uri.getPath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") == 0); + assertTrue(output.indexOf("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"11\" ruleId=\"EN_A_VS_AN\" msg=\"Use 'a' instead of 'an' if the following word doesn't start with a vowel sound, e.g. 'a sentence', 'a university'\" replacements=\"a\" context=\"This is an test. \" contextoffset=\"8\" errorlength=\"2\"/>") != -1); + } + + public void testPolishFileAPI() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("To jest świnia która się ślini."); + w.close(); + + String[] args = new String[] {"-l", "pl", "--api", input.getAbsolutePath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") == 0); + assertTrue(output.indexOf("<error fromy=\"0\" fromx=\"8\" toy=\"0\" tox=\"21\" ruleId=\"BRAK_PRZECINKA_KTORY\" subId=\"5\"") != -1); + //This tests whether XML encoding is actually UTF-8: + assertTrue(output.indexOf("msg=\"Brak przecinka w tym fragmencie zdania. Przecinek prawdopodobnie należy postawić tak: 'świnia, która'.\" replacements=\"świnia, która\" ") != -1); + assertTrue(output.indexOf("context=\"To jest świnia która się ślini. \" contextoffset=\"8\" errorlength=\"12\"/>") != -1); + } + + public void testPolishLineNumbers() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("Test."); + w.println("Test."); + w.println("Test."); + w.println("Test."); + w.println("Test."); + w.println("Test."); + w.println(""); + w.println("Test który wykaże błąd."); + w.close(); + + String[] args = new String[] {"-l", "pl", input.getAbsolutePath()}; + + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf("Line 8, column 1, Rule ID: BRAK_PRZECINKA_KTORY") != -1); + } + + public void testEnglishTagger() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "en", "--taggeronly", uri.getPath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: English") == 0); + assertTrue(output.indexOf("<S> This[this/DT] is[be/VBZ] an[a/DT] test[test/NN].[./.,</S>]") != -1); + } + + public void testBitextMode() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("This is not actual.\tTo nie jest aktualne."); + w.println("Test\tTest"); + w.println("ab\tVery strange data indeed, much longer than input"); + w.close(); + + String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf( + "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") != -1); + assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") != -1); + assertTrue(output.indexOf("Line 3, column 4, Rule ID: TRANSLATION_LENGTH") != -1); + } + + public void testBitextModeWithDisabledRule() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("this is not actual.\tTo nie jest aktualne."); + w.println("test\tTest"); + w.println("ab\tVery strange data indeed, much longer than input"); + w.close(); + + String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "-d", "UPPERCASE_SENTENCE_START,TRANSLATION_LENGTH", input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf( + "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") != -1); + assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") != -1); + assertTrue(output.indexOf("Rule ID: TRANSLATION_LENGTH") == -1); + } + + public void testBitextModeWithEnabledRule() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("this is not actual.\tTo nie jest aktualne."); + w.println("test\tTest"); + w.println("ab\tVery strange data indeed, much longer than input"); + w.close(); + + String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "-e", "TRANSLATION_LENGTH", input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf( + "Message: Hint: \"aktualny\" (Polish) means \"current\", \"(the) latest\", \"up-to-date\" (English). Did you mean 'rzeczywisty'?") == -1); + assertTrue(output.indexOf("Line 1, column 32, Rule ID: ACTUAL") == -1); + assertTrue(output.indexOf("Rule ID: TRANSLATION_LENGTH") != -1); + } + + public void testBitextModeApply() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("There is a dog.\tNie ma psa."); + w.close(); + + String[] args = new String[] {"-l", "pl", "--bitext", "-m", "en", "--apply", input.getAbsolutePath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.startsWith("Istnieje psa.")); + } + + public void testListUnknown() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "pl", "-u", uri.getPath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf("Unknown words: [This, is]") != -1); + } + + public void testNoListUnknown() throws URISyntaxException, IOException, ParserConfigurationException, SAXException { + final URL url = this.getClass().getResource(ENGLISH_TEST_FILE); + final URI uri = new URI (url.toString()); + String[] args = new String[] {"-l", "pl", uri.getPath()}; + Main.main(args); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Expected text language: Polish") == 0); + assertTrue(output.indexOf("Unknown words: [This, is]") == -1); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java b/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java new file mode 100644 index 0000000..a9a669c --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/TestTools.java @@ -0,0 +1,233 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.ResourceBundle; + +import junit.framework.Assert; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tools.StringTools; + +/** + * @author Daniel Naber + */ +public final class TestTools { + + private TestTools() { + } + + public static ResourceBundle getEnglishMessages() { + return getMessages("en"); + } + + /** + * Gets the resource bundle for the specified language. + * @param language lowercase two-letter ISO-639 code. + * @return the resource bundle for the specified language. + */ + public static ResourceBundle getMessages(String language) { + final ResourceBundle messages = ResourceBundle.getBundle( + "de.danielnaber.languagetool.MessagesBundle", new Locale(language)); + return messages; + } + + public static void testSplit(final String[] sentences, + final SentenceTokenizer stokenizer) { + final StringBuilder inputString = new StringBuilder(); + final List<String> input = new ArrayList<String>(); + for (final String sentence : sentences) { + input.add(sentence); + } + for (final String string : input) { + inputString.append(string); + } + Assert.assertEquals(input, stokenizer.tokenize(inputString.toString())); + } + + public static void myAssert(final String input, final String expected, + final Tokenizer tokenizer, final Tagger tagger) throws IOException { + final List<String> tokens = tokenizer.tokenize(input); + final List<String> noWhitespaceTokens = new ArrayList<String>(); + // whitespace confuses tagger, so give it the tokens but no whitespace + // tokens: + for (final String token : tokens) { + if (isWord(token)) { + noWhitespaceTokens.add(token); + } + } + final List<AnalyzedTokenReadings> output = tagger.tag(noWhitespaceTokens); + final StringBuffer outputStr = new StringBuffer(); + for (final Iterator<AnalyzedTokenReadings> iter = output.iterator(); iter + .hasNext();) { + final AnalyzedTokenReadings token = iter.next(); + final int readingsNumber = token.getReadingsLength(); + final List<String> readings = new ArrayList<String>(); + for (int j = 0; j < readingsNumber; j++) { + final StringBuffer readingStr = new StringBuffer(); + readingStr.append(token.getAnalyzedToken(j).getToken()); + readingStr.append("/["); + readingStr.append(token.getAnalyzedToken(j).getLemma()); + readingStr.append(']'); + readingStr.append(token.getAnalyzedToken(j).getPOSTag()); + readings.add(readingStr.toString()); + } + // force some order on the result just for the test case - order may vary + // from one version of the lexicon to the next: + Collections.sort(readings); + outputStr.append(StringTools.listToString(readings, "|")); + if (iter.hasNext()) { + outputStr.append(" -- "); + } + } + Assert.assertEquals(expected, outputStr.toString()); + } + + public static void myAssert(final String input, final String expected, + final Tokenizer tokenizer, final SentenceTokenizer sentenceTokenizer, + final Tagger tagger, final Disambiguator disambiguator) + throws IOException { + final StringBuffer outputStr = new StringBuffer(); + final List<String> sentences = sentenceTokenizer.tokenize(input); + for (final String sentence : sentences) { + final List<String> tokens = tokenizer.tokenize(sentence); + final List<String> noWhitespaceTokens = new ArrayList<String>(); + // whitespace confuses tagger, so give it the tokens but no whitespace + // tokens: + for (final String token : tokens) { + if (isWord(token)) { + noWhitespaceTokens.add(token); + } + } + final List<AnalyzedTokenReadings> aTokens = tagger + .tag(noWhitespaceTokens); + final AnalyzedTokenReadings[] tokenArray = new AnalyzedTokenReadings[tokens + .size() + 1]; + final AnalyzedToken[] startTokenArray = new AnalyzedToken[1]; + int toArrayCount = 0; + final AnalyzedToken sentenceStartToken = new AnalyzedToken("", + "SENT_START", null); + startTokenArray[0] = sentenceStartToken; + tokenArray[toArrayCount++] = new AnalyzedTokenReadings(startTokenArray, 0); + int startPos = 0; + int noWhitespaceCount = 0; + for (final String tokenStr : tokens) { + AnalyzedTokenReadings posTag = null; + if (isWord(tokenStr)) { + posTag = aTokens.get(noWhitespaceCount); + posTag.setStartPos(startPos); + noWhitespaceCount++; + } else { + posTag = tagger.createNullToken(tokenStr, startPos); + } + tokenArray[toArrayCount++] = posTag; + startPos += tokenStr.length(); + } + + AnalyzedSentence finalSentence = new AnalyzedSentence(tokenArray); + // disambiguate assigned tags + finalSentence = disambiguator.disambiguate(finalSentence); + + final AnalyzedTokenReadings[] output = finalSentence.getTokens(); + + for (int i = 0; i < output.length; i++) { + final AnalyzedTokenReadings token = output[i]; + final int readingsNumber = token.getReadingsLength(); + final List<String> readings = new ArrayList<String>(); + for (int j = 0; j < readingsNumber; j++) { + final StringBuffer readingStr = new StringBuffer(); + readingStr.append(token.getAnalyzedToken(j).getToken()); + readingStr.append("/["); + readingStr.append(token.getAnalyzedToken(j).getLemma()); + readingStr.append(']'); + readingStr.append(token.getAnalyzedToken(j).getPOSTag()); + readings.add(readingStr.toString()); + } + // force some order on the result just for the test case - order may vary + // from one version of the lexicon to the next: + Collections.sort(readings); + outputStr.append(StringTools.listToString(readings, "|")); + if (i < output.length - 1) { + outputStr.append(' '); + } + } + } + Assert.assertEquals(expected, outputStr.toString()); + } + + public static boolean isWord(final String token) { + for (int i = 0; i < token.length(); i++) { + final char c = token.charAt(i); + if (Character.isLetter(c) || Character.isDigit(c)) { + return true; + } + } + return false; + } + + /** + * Used to call private methods for testing + * + * @param targetClass + * Class under test + * @param methodName + * Name of the method under test + * @param argClasses + * Types of arguments + * @param argObjects + * Values of arguments + * @throws InvocationTargetException + * @throws IllegalAccessException + * @throws IllegalArgumentException + * @throws NoSuchMethodException + * @throws SecurityException + */ + public static void callStaticMethod(final Class targetClass, + final String methodName, final Class[] argClasses, + final Object[] argObjects) throws InvocationTargetException, + IllegalArgumentException, IllegalAccessException, SecurityException, + NoSuchMethodException { + + final Method method = targetClass.getDeclaredMethod(methodName, argClasses); + method.setAccessible(true); + method.invoke(null, argObjects); + } + + public static String callStringStaticMethod(final Class targetClass, + final String methodName, final Class[] argClasses, + final Object[] argObjects) throws InvocationTargetException, + IllegalArgumentException, IllegalAccessException, SecurityException, + NoSuchMethodException { + + final Method method = targetClass.getDeclaredMethod(methodName, argClasses); + method.setAccessible(true); + return (String) method.invoke(null, argObjects); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java new file mode 100644 index 0000000..fe4b5e2 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/TranslationTest.java @@ -0,0 +1,117 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import de.danielnaber.languagetool.tools.StringTools; + +import junit.framework.TestCase; + +/** + * Check if the translations seem to be complete. + * + * @author Daniel Naber + */ +public class TranslationTest extends TestCase { + + public void testTranslationKeyExistence() throws IOException { + // use English version as the reference: + File englishFile = getTranslationFile(Language.ENGLISH); + Properties enProps = new Properties(); + enProps.load(new FileInputStream(englishFile)); + Set<Object> englishKeys = enProps.keySet(); + for (int i = 0; i < Language.LANGUAGES.length; i++) { + Language lang = Language.LANGUAGES[i]; + if (lang == Language.ENGLISH || lang == Language.DEMO) + continue; + Properties langProps = new Properties(); + File langFile = getTranslationFile(lang); + if (!langFile.exists()) + continue; + langProps.load(new FileInputStream(langFile)); + Set<Object> langKeys = langProps.keySet(); + for (Object englishKey : englishKeys) { + if (!langKeys.contains(englishKey)) { + System.err.println("***** No key '" + englishKey + "' in file " + langFile); + } + } + } + } + + /** + * Make sure values are not empty. + */ + public void testTranslationsAreNotEmpty() throws IOException { + for (int i = 0; i < Language.LANGUAGES.length; i++) { + Language lang = Language.LANGUAGES[i]; + if (lang == Language.DEMO) + continue; + File file = getTranslationFile(lang); + if (!file.exists()) { + System.err.println("Note: no translation available for " + lang); + continue; + } + List<String> lines = loadFile(file); + for (String line : lines) { + line = line.trim(); + if (StringTools.isEmpty(line) || line.charAt(0)=='#') + continue; + String[] parts = line.split("="); + if (parts.length < 2) { + System.err.println("***** Empty translation: '" + line + "' in file " + file); + //fail("Empty translation: '" + line + "' in file " + file); + } + } + } + } + + private List<String> loadFile(File file) throws IOException { + List<String> l = new ArrayList<String>(); + FileReader fr = null; + BufferedReader br = null; + try { + fr = new FileReader(file); + br = new BufferedReader(fr); + String line; + while ((line = br.readLine()) != null) { + l.add(line); + } + } finally { + if (br != null) br.close(); + if (fr != null) fr.close(); + } + return l; + } + + private File getTranslationFile(Language lang) { + return new File("src" + File.separator + "java" + File.separator + + "de" + File.separator + "danielnaber" + File.separator + "languagetool" + + File.separator + "MessagesBundle_" + lang.getShortName() + ".properties"); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java new file mode 100644 index 0000000..cf74a6b --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/ValidateXMLTest.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.IOException; + +import junit.framework.TestCase; + +public class ValidateXMLTest extends TestCase { + + public void testPatternFile() throws IOException { + XMLValidator validator = new XMLValidator(); + for (int i = 0; i < Language.LANGUAGES.length; i++) { + Language lang = Language.LANGUAGES[i]; + String grammarFile = JLanguageTool.getDataBroker().getRulesDir() + "/" + lang.getShortName() + "/grammar.xml"; + validator.validate(grammarFile, JLanguageTool.getDataBroker().getRulesDir() + "/rules.xsd"); + } + } + + public void testFalseFriendsXML() throws IOException { + XMLValidator validator = new XMLValidator(); + validator.validate(JLanguageTool.getDataBroker().getRulesDir() + "/false-friends.xml", + JLanguageTool.getDataBroker().getRulesDir() + "/false-friends.dtd", "rules"); + } + + public void testDisambiguationRuleFile() throws IOException { + XMLValidator validator = new XMLValidator(); + //for (int i = 0; i < Language.LANGUAGES.length; i++) { + // Language lang = Language.LANGUAGES[i]; + Language lang = Language.FRENCH; + String grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml"; + validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd"); + lang = Language.ENGLISH; + grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml"; + validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd"); + lang = Language.DUTCH; + grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml"; + validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd"); + lang = Language.POLISH; + grammarFile = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + "/disambiguation.xml"; + validator.validate(grammarFile, JLanguageTool.getDataBroker().getResourceDir() + "/disambiguation.xsd"); + // } + } + + /** + * Validate XML files, as a help for people developing rules that are not programmers. + */ + public static void main(final String[] args) throws IOException { + final ValidateXMLTest prt = new ValidateXMLTest(); + System.out.println("Validating XML grammar files ..."); + prt.testPatternFile(); + prt.testFalseFriendsXML(); + prt.testDisambiguationRuleFile(); + System.out.println("Validation tests successful."); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java new file mode 100644 index 0000000..d60d387 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/VersionNumberTest.java @@ -0,0 +1,57 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +public class VersionNumberTest extends TestCase { + + public void testVersionNumber() throws IOException { + String buildFile = load("build.properties"); + Pattern p1 = Pattern.compile("version = ([0-9\\.]+(-dev)?)"); + Matcher m1 = p1.matcher(buildFile); + m1.find(); + String javaFile = load("src/java/de/danielnaber/languagetool/JLanguageTool.java"); + Pattern p2 = Pattern.compile("VERSION = \"(.*?)\""); + Matcher m2 = p2.matcher(javaFile); + m2.find(); + assertEquals(m1.group(1), m2.group(1)); + //System.out.println(m1.group(1)); + } + + private String load(String filename) throws IOException { + FileReader fr = new FileReader(filename); + BufferedReader br = new BufferedReader(fr); + StringBuffer sb = new StringBuffer(); + String line; + while ((line = br.readLine()) != null) { + sb.append(line); + } + br.close(); + fr.close(); + return sb.toString(); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java b/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java new file mode 100644 index 0000000..eaf606e --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/XMLValidator.java @@ -0,0 +1,158 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.net.URL; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.XMLConstants; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; +import javax.xml.validation.Validator; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; + +import de.danielnaber.languagetool.tools.StringTools; + +/** + * Validate XML files with a given DTD. + * + * @author Daniel Naber + */ +public final class XMLValidator { + + public XMLValidator() { + } + + /** + * Check some limits of our simplified XML output. + */ + public void checkSimpleXMLString(String xml) throws IOException { + Pattern p = Pattern.compile("(<error.*?/>)", Pattern.DOTALL|Pattern.MULTILINE); + Matcher matcher = p.matcher(xml); + int pos = 0; + while (matcher.find(pos)) { + String errorElement = matcher.group(); + pos = matcher.end(); + if (errorElement.contains("\n") || errorElement.contains("\r")) + throw new IOException("<error ...> may not contain line breaks"); + char beforeError = xml.charAt(matcher.start()-1); + if (beforeError != '\n' && beforeError != '\r') + throw new IOException("Each <error ...> must start on a new line"); + } + } + + /** + * Validate XML with the given DTD. Throws exception on error. + */ + public void validateXMLString(String xml, String dtdFile, String docType) throws SAXException, IOException, ParserConfigurationException { + validateInternal(xml, dtdFile, docType); + } + + /** + * Validate XML file with the given DTD. Throws exception on error. + */ + public final void validate(String filename, String dtdFile, String docType) throws IOException { + try { + String xml = StringTools.readFile(this.getClass().getResourceAsStream(filename), "utf-8"); + validateInternal(xml, dtdFile, docType); + } catch (Exception e) { + IOException ioe = new IOException("Cannot load or parse '"+filename+"'"); + ioe.initCause(e); + throw ioe; + } + } + + /** + * Validate XML file using the given XSD. Throws an exception on error + * @param filename File to validate. + * @param xmlSchema Schema to use. + * @throws IOException Thrown on error. + */ + public final void validate(String filename, String xmlSchema) throws IOException { + try { + validateInternal(this.getClass().getResourceAsStream(filename), + this.getClass().getResource(xmlSchema)); + } catch (Exception e) { + IOException ioe = new IOException("Cannot load or parse '"+filename+"'"); + ioe.initCause(e); + throw ioe; + } + } + + private void validateInternal(String xml, String dtdFile, String doctype) throws SAXException, IOException, ParserConfigurationException { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setValidating(true); + SAXParser saxParser = factory.newSAXParser(); + //used for removing existing DOCTYPE from grammar.xml files + xml = xml.replaceAll("<!DOCTYPE.+>", ""); + final String decl = "<?xml version=\"1.0\""; + final String endDecl = "?>"; + final String dtd = "<!DOCTYPE "+doctype+" PUBLIC \"-//W3C//DTD Rules 0.1//EN\" \"" +this.getClass().getResource(dtdFile)+ "\">"; + int pos = xml.indexOf(decl); + int endPos = xml.indexOf(endDecl); + if (pos == -1) + throw new IOException("No XML declaration found in '" + xml.substring(0, Math.min(100, xml.length())) + "...'"); + String newXML = xml.substring(0, endPos+endDecl.length()) + "\r\n" + dtd + xml.substring(endPos+endDecl.length()); + //System.err.println(newXML); + InputSource is = new InputSource(new StringReader(newXML)); + saxParser.parse(is, new ErrorHandler()); + } + + private void validateInternal(InputStream xml, URL xmlSchema) throws SAXException, IOException, ParserConfigurationException { + SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); + Schema schema = sf.newSchema(xmlSchema); + Validator validator = schema.newValidator(); + validator.setErrorHandler(new ErrorHandler()); + validator.validate(new StreamSource(xml)); + } + +} + +/** + * XML handler that throws exception on error and warning, does nothing otherwise. + */ +class ErrorHandler extends DefaultHandler { + + public void warning (SAXParseException e) throws SAXException { + System.err.println(e.getMessage() + + " Problem found at line " + e.getLineNumber() + + ", column " + e.getColumnNumber() + "."); + throw e; + } + + public void error (SAXParseException e) throws SAXException { + System.err.println(e.getMessage() + + " Problem found at line " + e.getLineNumber() + + ", column " + e.getColumnNumber() + "."); + throw e; + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java new file mode 100644 index 0000000..0c66989 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/TabBitextReaderTest.java @@ -0,0 +1,59 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.bitext; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; + +import junit.framework.TestCase; + +public class TabBitextReaderTest extends TestCase { + + public void testReader() throws Exception { + // Create a simple plain text file. + File input = File.createTempFile("input", "txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("This is not actual.\tTo nie jest aktualne."); + w.println("Test\tTest"); + w.println("ab\tVery strange data indeed, much longer than input"); + w.close(); + + TabBitextReader reader = new TabBitextReader(input.getAbsolutePath(), "UTF-8"); + int i = 1; + for (StringPair srcAndTrg : reader) { + assertTrue(srcAndTrg.getSource() != null); + assertTrue(srcAndTrg.getTarget() != null); + if (i == 1) { + assertEquals("This is not actual.", srcAndTrg.getSource()); + } else if (i == 2) { + assertEquals("Test", srcAndTrg.getSource()); + } else if (i == 3) { + assertEquals("Very strange data indeed, much longer than input", + srcAndTrg.getTarget()); + } + i++; + } + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java new file mode 100644 index 0000000..b044fc0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/bitext/WordFastTMReaderTest.java @@ -0,0 +1,56 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.bitext; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; + +import junit.framework.TestCase; + +public class WordFastTMReaderTest extends TestCase { + + public void testReader() throws Exception { + // Create a simple WordFast text memory. + File input = File.createTempFile("input", ".txt"); + input.deleteOnExit(); + + // Populate the file with data. + PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8")); + w.println("%20100801~111517\t%UserID,AHLJat,AHLJat\t%TU=00008580\t%EN-US\t%Wordfast TM v.546/00\t%PL-PL\t%\t."); + w.println("20100727~145333\tAHLJat\t2\tEN-US\tObjection:\tPL-PL\tZarzut: "); + w.println("20100727~051350\tAHLJat\t2\tEN-US\tWhy not?&tA;\tPL-PL\tDlaczego nie?&tA; "); + w.close(); + + WordFastTMReader reader = new WordFastTMReader(input.getAbsolutePath(), "UTF-8"); + int i = 1; + for (StringPair srcAndTrg : reader) { + assertTrue(srcAndTrg.getSource() != null); + assertTrue(srcAndTrg.getTarget() != null); + if (i == 1) { + assertEquals("Objection:", srcAndTrg.getSource()); + } else if (i == 2) { + assertEquals("Why not?&tA;", srcAndTrg.getSource()); + } + i++; + } + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java new file mode 100644 index 0000000..863e8ac --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/openoffice/MainTest.java @@ -0,0 +1,37 @@ +package de.danielnaber.languagetool.openoffice; + +import junit.framework.TestCase; + +import com.sun.star.lang.Locale; +import com.sun.star.linguistic2.ProofreadingResult; +import com.sun.star.beans.PropertyValue; + +public class MainTest extends TestCase { + + public void testDoProofreading() { + Main prog = new Main(null); + final String testString = "To jest trudne zdanie. A to następne. A to przedostatnie jest.\u0002 Test ostatniego."; + final Locale plLoc = new Locale("pl", "PL", ""); + final PropertyValue[] prop = new PropertyValue[0]; + for (int i = 0; i<=testString.length(); i++) { + ProofreadingResult paRes = prog.doProofreading("1", testString, plLoc, i, testString.length(), prop); + assertEquals("1", paRes.aDocumentIdentifier); + assertTrue(paRes.nStartOfNextSentencePosition >= i); + if (i < "To jest trudne zdanie. ".length()) { + assertEquals("To jest trudne zdanie. ".length(), paRes.nStartOfNextSentencePosition); + assertEquals(0, paRes.nStartOfSentencePosition); + } + } + ProofreadingResult paRes = prog.doProofreading("1", testString, plLoc, 0, testString.length(), prop); + assertEquals("1", paRes.aDocumentIdentifier); + assertEquals(23, paRes.nStartOfNextSentencePosition); + assertEquals(0, paRes.nStartOfSentencePosition); + //that was causing NPE but not anymore: + String testString2 = "To jest „nowy problem”. A to inny jeszcze( „problem. Co jest „?"; + paRes = prog.doProofreading("1", testString2, plLoc, 0, testString2.length(), prop); + assertEquals("1", paRes.aDocumentIdentifier); + assertEquals(24, paRes.nStartOfNextSentencePosition); + assertEquals(0, paRes.nStartOfSentencePosition); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java new file mode 100644 index 0000000..6a069ff --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CommaWhitespaceRuleTest.java @@ -0,0 +1,102 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; + +/** + * @author Daniel Naber + */ +public class CommaWhitespaceRuleTest extends TestCase { + + public void testRule() throws IOException { + CommaWhitespaceRule rule = new CommaWhitespaceRule(TestTools.getEnglishMessages()); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is a test sentence.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This, is, a test sentence.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This (foo bar) is a test(!).")).length); + //we get only entities into the comma rule, so let's test for entities: + assertEquals(0, rule.match(langTool.getAnalyzedSentence(""This is it," he said.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das kostet €2,45.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das kostet 50,- Euro")).length); + //test OpenOffice field codes: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("In his book,\u0002 Einstein proved this to be true.")).length); + + //test thousand separators: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is $1,000,000.")).length); + //test numbers: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is 1,5.")).length); + + //test two consecutive commas: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("This is a ,,test''.")).length); + + // errors: + matches = rule.match(langTool.getAnalyzedSentence("This,is a test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This , is a test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This ,is a test sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence(",is a test sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This ( foo bar) is a test(!).")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This (foo bar ) is a test(!).")); + assertEquals(1, matches.length); + + //other brackets, first [ + matches = rule.match(langTool.getAnalyzedSentence("This [ foo bar) is a test(!).")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This (foo bar ] is a test(!).")); + assertEquals(1, matches.length); + //now { + matches = rule.match(langTool.getAnalyzedSentence("This { foo bar) is a test(!).")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This (foo bar } is a test(!).")); + assertEquals(1, matches.length); + + //full stop error: + matches = rule.match(langTool.getAnalyzedSentence("This is a sentence with an orphaned full stop .")); + assertEquals(1, matches.length); + //full stop exception cases: + matches = rule.match(langTool.getAnalyzedSentence("This is a sentence with ellipsis ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a figure: .5 and it's correct.")); + assertEquals(0, matches.length); + + matches = rule.match(langTool.getAnalyzedSentence("ABB ( z.B. )")); + // check match positions: + assertEquals(2, matches.length); + assertEquals(4, matches[0].getFromPos()); + assertEquals(6, matches[0].getToPos()); + assertEquals(11, matches[1].getFromPos()); + assertEquals(13, matches[1].getToPos()); + matches = rule.match(langTool.getAnalyzedSentence("This is a test with a OOo footnote\u0002, which is denoted by 0x2 in the text.")); + assertEquals(0, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java new file mode 100644 index 0000000..92b50db --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/CompoundRuleTestAbs.java @@ -0,0 +1,78 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; + +/** + * Abstract test case for CompoundRule. <br/> + * Based on an original version for [en] and [pl]. + * + * @author Daniel Naber + * + */ +public abstract class CompoundRuleTestAbs extends TestCase { + + // the object used for checking text against different rules + protected JLanguageTool langTool; + // the rule that checks that compounds (if in the list) are not written as separate words. Language specific. + protected AbstractCompoundRule rule; + + protected void setUp() throws Exception { + super.setUp(); + // concrete classes will initialize langTool and rule variables here. + } + + public void check(int expectedErrors, String text) throws IOException { + check(expectedErrors, text, null); + } + + /** + * Check the text against the compound rule. + * @param expectedErrors teh number of extected errors. + * @param text the text to check. + * @param expSuggestions the expected suggestions. + * @throws IOException thrown by JLanguageTool. + */ + public void check(int expectedErrors, String text, String[] expSuggestions) throws IOException { + assertNotNull("Please initialize langTool!", langTool); + assertNotNull("Please initialize 'rule'!", rule); + final RuleMatch[] ruleMatches = rule.match(langTool.getAnalyzedSentence(text)); + assertEquals(expectedErrors, ruleMatches.length); + if (expSuggestions != null && expectedErrors != 1) { + throw new RuntimeException("Sorry, test case can only check suggestion if there's one rule match"); + } + if (expSuggestions != null) { + final RuleMatch ruleMatch = ruleMatches[0]; + assertEquals(String.format("Got these suggestions: %s, expected %d ", ruleMatch.getSuggestedReplacements(), expSuggestions.length), + expSuggestions.length, ruleMatch.getSuggestedReplacements().size()); + int i = 0; + for (final Object element : ruleMatch.getSuggestedReplacements()) { + final String suggestion = (String) element; + //System.err.println(">>"+suggestion); + assertEquals(expSuggestions[i], suggestion); + i++; + } + } + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java new file mode 100644 index 0000000..fc08de0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/DoublePunctuationRuleTest.java @@ -0,0 +1,55 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; + +/** + * @author Daniel Naber + */ +public class DoublePunctuationRuleTest extends TestCase { + + public void testRule() throws IOException { + DoublePunctuationRule rule = new DoublePunctuationRule(TestTools.getEnglishMessages()); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence... More stuff....")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence..... More stuff....")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This, is, a test sentence.")); + assertEquals(0, matches.length); + + // errors: + matches = rule.match(langTool.getAnalyzedSentence("This,, is a test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.. Another sentence")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java new file mode 100644 index 0000000..46da891 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRuleTest.java @@ -0,0 +1,159 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; + +public class GenericUnpairedBracketsRuleTest extends TestCase { + + public void testRuleGerman() throws IOException { + GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.GERMAN); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(Das sind die Sätze, die die testen sollen).")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule + .match(langTool.getAnalyzedSentence("Die „Sätze zum testen.")); + assertEquals(1, matches.length); + } + + public void testRuleSpanish() throws IOException { + GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.SPANISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.SPANISH); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("Soy un hombre (muy honrado).")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("De dónde vas?")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("¡Atención")); + assertEquals(1, matches.length); + } + + public void testRuleFrench() throws IOException { + GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.FRENCH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.FRENCH); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(Qu'est ce que c'est ?)")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule + .match(langTool.getAnalyzedSentence("(Qu'est ce que c'est ?")); + assertEquals(1, matches.length); + } + + public void testRuleDutch() throws IOException { + GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.DUTCH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.DUTCH); + // correct sentences: + matches = rule + .match(langTool + .getAnalyzedSentence("Het centrale probleem van het werk is de ‘dichterlijke kuischheid’.")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule + .match(langTool + .getAnalyzedSentence("Het centrale probleem van het werk is de „dichterlijke kuischheid.")); + assertEquals(1, matches.length); + } + + public void testRuleRomanian() throws IOException { + GenericUnpairedBracketsRule rule = new GenericUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.ROMANIAN); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ROMANIAN); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat (pentru puțin timp).")); + assertEquals(0, matches.length); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("Nu's de prin locurile astea.")); + assertEquals(0, matches.length); + // cross-bracket matching + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost )plecat( pentru (puțin timp).")); + assertEquals(2, matches.length); + // cross-bracket matching + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost {plecat) pentru (puțin timp}.")); + assertEquals(4, matches.length); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat pentru „puțin timp”.")); + assertEquals(0, matches.length); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru... puțin timp”.")); + assertEquals(0, matches.length); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru... «puțin» timp”.")); + assertEquals(0, matches.length); + // correct sentences ( " is _not_ a Romanian symbol - just + // ignore it, the correct form is [„] (start quote) and [”] (end quote) + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat \"pentru puțin timp.")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru... puțin timp.")); + assertEquals(1, matches.length); + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("A fost plecat «puțin.")); + assertEquals(1, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru «puțin timp”.")); + assertEquals(3, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru puțin» timp”.")); + assertEquals(3, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("A fost plecat „pentru... puțin» timp”.")); + assertEquals(3, matches.length); + // cross-bracket matching + // incorrect sentences: + matches = rule + .match(langTool + .getAnalyzedSentence("A fost plecat „pentru... «puțin” timp».")); + assertEquals(4, matches.length); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java new file mode 100644 index 0000000..ce42345 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/UppercaseSentenceStartRuleTest.java @@ -0,0 +1,98 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; +import java.util.List; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; + +/** + * @author Daniel Naber + */ +public class UppercaseSentenceStartRuleTest extends TestCase { + + public void testRule() throws IOException { + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + List<RuleMatch> matches; + + matches = langTool.check("Dies ist ein Satz. Und hier kommt noch einer"); + assertEquals(0, matches.size()); + matches = langTool.check("Dies ist ein Satz. Ätsch, noch einer mit Umlaut."); + assertEquals(0, matches.size()); + matches = langTool.check("Dieser Satz ist bspw. okay so."); + assertEquals(0, matches.size()); + matches = langTool.check("Dieser Satz ist z.B. okay so."); + assertEquals(0, matches.size()); + matches = langTool.check("Dies ist ein Satz. \"Aber der hier auch!\"."); + assertEquals(0, matches.size()); + matches = langTool.check("\"Dies ist ein Satz!\""); + assertEquals(0, matches.size()); + matches = langTool.check("'Dies ist ein Satz!'"); + assertEquals(0, matches.size()); + + matches = langTool.check("Sehr geehrte Frau Merkel,\nwie wir Ihnen schon früher mitgeteilt haben..."); + assertEquals(0, matches.size()); + + matches = langTool.check("Dies ist ein Satz. und hier kommt noch einer"); + assertEquals(1, matches.size()); + matches = langTool.check("Dies ist ein Satz. ätsch, noch einer mit Umlaut."); + assertEquals(1, matches.size()); + matches = langTool.check("Dies ist ein Satz. \"aber der hier auch!\""); + assertEquals(1, matches.size()); + matches = langTool.check("\"dies ist ein Satz!\""); + assertEquals(1, matches.size()); + matches = langTool.check("'dies ist ein Satz!'"); + assertEquals(1, matches.size()); + + langTool = new JLanguageTool(Language.ENGLISH); + matches = langTool.check("In Nov. next year."); + assertEquals(0, matches.size()); + } + + public void testDutchSpecialCases() throws IOException { + JLanguageTool langTool = new JLanguageTool(Language.DUTCH); + List<RuleMatch> matches; + + matches = langTool.check("A sentence."); + assertEquals(0, matches.size()); + matches = langTool.check("'s Morgens..."); + assertEquals(0, matches.size()); + + matches = langTool.check("a sentence."); + assertEquals(1, matches.size()); + matches = langTool.check("'s morgens..."); + assertEquals(1, matches.size()); + matches = langTool.check("s sentence."); + assertEquals(1, matches.size()); + } + + public void testPolishSpecialCases() throws IOException { + JLanguageTool langTool = new JLanguageTool(Language.POLISH); + List<RuleMatch> matches; + + matches = langTool.check("Zdanie."); + assertEquals(0, matches.size()); + matches = langTool.check("To jest lista punktowana:\n\npunkt pierwszy,\n\npunkt drugi,\n\npunkt trzeci."); + assertEquals(0, matches.size()); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java new file mode 100644 index 0000000..152dd07 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WhitespaceRuleTest.java @@ -0,0 +1,75 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; + +/** + * + * @author Marcin Milkowski + */ +public class WhitespaceRuleTest extends TestCase { + + public void testRule() throws IOException { + WhitespaceRule rule = new WhitespaceRule(TestTools.getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("\n\tThis is a test sentence...")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(1, matches.length); + assertEquals(4, matches[0].getFromPos()); + assertEquals(6, matches[0].getToPos()); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(1, matches.length); + assertEquals(14, matches[0].getFromPos()); + assertEquals(17, matches[0].getToPos()); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(3, matches.length); + assertEquals(7, matches[0].getFromPos()); + assertEquals(10, matches[0].getToPos()); + assertEquals(11, matches[1].getFromPos()); + assertEquals(13, matches[1].getToPos()); + assertEquals(17, matches[2].getFromPos()); + assertEquals(20, matches[2].getToPos()); + matches = rule.match(langTool.getAnalyzedSentence("\t\t\t \t\t\t\t ")); + assertEquals(1, matches.length); + langTool = new JLanguageTool(Language.POLISH); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("To jest test.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("To jest test.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java new file mode 100644 index 0000000..01f9007 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/WordRepeatRuleTest.java @@ -0,0 +1,81 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.de.GermanWordRepeatRule; + +/** + * + * @author Daniel Naber + */ +public class WordRepeatRuleTest extends TestCase { + + public void testRule() throws IOException { + WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence...")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("This this is a test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("This is is a a test sentence sentence.")); + assertEquals(3, matches.length); + } + + public void testRuleGerman() throws IOException { + WordRepeatRule rule = new GermanWordRepeatRule(TestTools.getEnglishMessages(), Language.GERMAN); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Das sind die Sätze, die die testen sollen.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Sätze, die die testen.")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Die die Sätze zum testen.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Und die die Sätze zum testen.")); + assertEquals(1, matches.length); + } + + public void testRulePolish() throws IOException { + WordRepeatRule rule = new WordRepeatRule(TestTools.getEnglishMessages(), Language.POLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.POLISH); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie.")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("To jest jest zdanie.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java new file mode 100644 index 0000000..01e4f6a --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/BitextPatternRuleTest.java @@ -0,0 +1,288 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.bitext; + +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.List; +import java.util.Set; + +import de.danielnaber.languagetool.AnalyzedSentence; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.bitext.StringPair; +import de.danielnaber.languagetool.rules.Rule; +import de.danielnaber.languagetool.rules.RuleMatch; +import de.danielnaber.languagetool.rules.patterns.PatternRule; +import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRule; +import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRuleLoader; +import junit.framework.TestCase; + +public class BitextPatternRuleTest extends TestCase { + + public void testBitextRulesFromXML() throws IOException { + testBitextRulesFromXML(null, false); + } + + private void testBitextRulesFromXML(final Set<Language> ignoredLanguages, + final boolean verbose) throws IOException { + for (final Language lang : Language.LANGUAGES) { + if (ignoredLanguages != null && ignoredLanguages.contains(lang)) { + if (verbose) { + System.out.println("Ignoring tests for " + lang.getName()); + } + continue; + } + final BitextPatternRuleLoader ruleLoader = new BitextPatternRuleLoader(); + final String name = "/" + lang.getShortName() + "/bitext.xml"; + final InputStream is = JLanguageTool.getDataBroker().getFromRulesDirAsStream(name); + if (is != null) { + if (verbose) { + System.out.println("Running tests for " + lang.getName() + "..."); + } + final JLanguageTool languageTool = new JLanguageTool(lang); + final List<BitextPatternRule> rules = ruleLoader.getRules(is, name); + testBitextRulesFromXML(rules, languageTool, Language.POLISH); + } + } + } + + private void testBitextRulesFromXML(final List<BitextPatternRule> rules, + final JLanguageTool languageTool, final Language lang) throws IOException { + final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>(); + for (final BitextPatternRule rule : rules) { + testBitextRule(rule, lang, languageTool); + } + /* + if (!complexRules.isEmpty()) { + final Set<String> set = complexRules.keySet(); + final List<PatternRule> badRules = new ArrayList<PatternRule>(); + final Iterator<String> iter = set.iterator(); + while (iter.hasNext()) { + final PatternRule badRule = complexRules.get(iter.next()); + if (badRule != null) { + badRule.notComplexPhrase(); + badRule + .setMessage("The rule contains a phrase that never matched any incorrect example."); + badRules.add(badRule); + } + } + if (!badRules.isEmpty()) { + testGrammarRulesFromXML(badRules, languageTool, lang); + } + } + */ + } + + private String cleanSentence(String str) { + return cleanXML(str.replaceAll("[\\n\\t]+", "")); + } + + private void testMarker(int expectedMatchStart, + int expectedMatchEnd, Rule rule, Language lang) { + if (expectedMatchStart == -1 || expectedMatchEnd == -1) { + fail(lang + + ": No error position markup ('<marker>...</marker>') in bad example in rule " + + rule); + } + + } + + private void testBadSentence(final String origBadSentence, + final List<String> suggestedCorrection, final int expectedMatchStart, + final int expectedMatchEnd, final PatternRule rule, + final Language lang, + final JLanguageTool languageTool) throws IOException { + final String badSentence = cleanXML(origBadSentence); + assertTrue(badSentence.trim().length() > 0); + RuleMatch[] matches = getMatches(rule, badSentence, languageTool); +// if (!rule.isWithComplexPhrase()) { + assertTrue(lang + ": Did expect one error in: \"" + badSentence + + "\" (Rule: " + rule + "), got " + matches.length + + ". Additional info:" + rule.getMessage(), matches.length == 1); + assertEquals(lang + + ": Incorrect match position markup (start) for rule " + rule, + expectedMatchStart, matches[0].getFromPos()); + assertEquals(lang + + ": Incorrect match position markup (end) for rule " + rule, + expectedMatchEnd, matches[0].getToPos()); + // make sure suggestion is what we expect it to be + if (suggestedCorrection != null && suggestedCorrection.size() > 0) { + assertTrue("You specified a correction but your message has no suggestions in rule " + rule, + rule.getMessage().contains("<suggestion>") + ); + assertTrue(lang + ": Incorrect suggestions: " + + suggestedCorrection.toString() + " != " + + matches[0].getSuggestedReplacements() + " for rule " + rule, + suggestedCorrection.equals(matches[0] + .getSuggestedReplacements())); +// } + // make sure the suggested correction doesn't produce an error: + if (matches[0].getSuggestedReplacements().size() > 0) { + final int fromPos = matches[0].getFromPos(); + final int toPos = matches[0].getToPos(); + for (final String repl : matches[0].getSuggestedReplacements()) { + final String fixedSentence = badSentence.substring(0, fromPos) + + repl + badSentence.substring(toPos); + matches = getMatches(rule, fixedSentence, languageTool); + if (matches.length > 0) { + fail("Incorrect input:\n" + + " " + badSentence + + "\nCorrected sentence:\n" + + " " + fixedSentence + + "\nBy Rule:\n" + + " " + rule + + "\nThe correction triggered an error itself:\n" + + " " + matches[0] + "\n"); + } + } + } + } + } + + private void testBitextRule(final BitextPatternRule rule, final Language lang, + final JLanguageTool languageTool) throws IOException { + JLanguageTool srcTool = new JLanguageTool(rule.getSourceLang()); + //int noSuggestionCount = 0; + final List<StringPair> goodSentences = rule.getCorrectBitextExamples(); + for (StringPair goodSentence : goodSentences) { + assertTrue(cleanSentence(goodSentence.getSource()).trim().length() > 0); + assertTrue(cleanSentence(goodSentence.getTarget()).trim().length() > 0); + assertFalse(lang + ": Did not expect error in: " + goodSentence + + " (Rule: " + rule + ")", + match(rule, goodSentence.getSource(), goodSentence.getTarget(), + srcTool, languageTool)); + } + final List<IncorrectBitextExample> badSentences = rule.getIncorrectBitextExamples(); + for (IncorrectBitextExample origBadExample : badSentences) { + // enable indentation use + String origBadSrcSentence = origBadExample.getExample().getSource().replaceAll( + "[\\n\\t]+", ""); + String origBadTrgSentence = origBadExample.getExample().getTarget().replaceAll( + "[\\n\\t]+", ""); + final List<String> suggestedCorrection = origBadExample + .getCorrections(); + final int expectedSrcMatchStart = origBadSrcSentence.indexOf("<marker>"); + final int expectedSrcMatchEnd = origBadSrcSentence.indexOf("</marker>") + - "<marker>".length(); + testMarker(expectedSrcMatchStart, expectedSrcMatchEnd, rule, lang); + final int expectedTrgMatchStart = origBadTrgSentence.indexOf("<marker>"); + final int expectedTrgMatchEnd = origBadTrgSentence.indexOf("</marker>") + - "<marker>".length(); + testMarker(expectedTrgMatchStart, expectedTrgMatchEnd, rule, lang); + + testBadSentence(origBadSrcSentence, + suggestedCorrection, expectedSrcMatchStart, + expectedSrcMatchEnd, rule.getSrcRule(), + lang, + srcTool); + + testBadSentence(origBadTrgSentence, + suggestedCorrection, expectedTrgMatchStart, + expectedTrgMatchEnd, rule.getTrgRule(), + lang, + languageTool); + + } + + /* } else { // for multiple rules created with complex phrases + + matches = getMatches(rule, badSentence, languageTool); + if (matches.length == 0 + && !complexRules.containsKey(rule.getId() + badSentence)) { + complexRules.put(rule.getId() + badSentence, rule); + } + + if (matches.length != 0) { + complexRules.put(rule.getId() + badSentence, null); + assertTrue(lang + ": Did expect one error in: \"" + badSentence + + "\" (Rule: " + rule + "), got " + matches.length, + matches.length == 1); + assertEquals(lang + + ": Incorrect match position markup (start) for rule " + rule, + expectedMatchStart, matches[0].getFromPos()); + assertEquals(lang + + ": Incorrect match position markup (end) for rule " + rule, + expectedMatchEnd, matches[0].getToPos()); + // make sure suggestion is what we expect it to be + if (suggestedCorrection != null && suggestedCorrection.size() > 0) { + assertTrue( + lang + ": Incorrect suggestions: " + + suggestedCorrection.toString() + " != " + + matches[0].getSuggestedReplacements() + " for rule " + + rule, suggestedCorrection.equals(matches[0] + .getSuggestedReplacements())); + } + // make sure the suggested correction doesn't produce an error: + if (matches[0].getSuggestedReplacements().size() > 0) { + final int fromPos = matches[0].getFromPos(); + final int toPos = matches[0].getToPos(); + for (final String repl : matches[0].getSuggestedReplacements()) { + final String fixedSentence = badSentence.substring(0, fromPos) + + repl + badSentence.substring(toPos); + matches = getMatches(rule, fixedSentence, languageTool); + assertEquals("Corrected sentence for rule " + rule + + " triggered error: " + fixedSentence, 0, matches.length); + } + } else { + noSuggestionCount++; + } + } */ + } + + + + + protected String cleanXML(final String str) { + return str.replaceAll("<([^<].*?)>", ""); + } + + private boolean match(final BitextPatternRule rule, final String src, final String trg, + final JLanguageTool srcLanguageTool, + final JLanguageTool trgLanguageTool) throws IOException { + final AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src); + final AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg); + final RuleMatch[] matches = rule.match(srcText, trgText); + return matches.length > 0; + } + + + private RuleMatch[] getMatches(final Rule rule, final String sentence, + final JLanguageTool languageTool) throws IOException { + final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence); + final RuleMatch[] matches = rule.match(text); + return matches; + } + + /** + * Test XML patterns, as a help for people developing rules that are not + * programmers. + */ + public static void main(final String[] args) throws IOException { + final BitextPatternRuleTest prt = new BitextPatternRuleTest(); + System.out.println("Running XML bitext pattern tests..."); + prt.testBitextRulesFromXML(); + System.out.println("Tests successful."); + } + + + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java new file mode 100644 index 0000000..ab6cfc3 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/DifferentLengthRuleTest.java @@ -0,0 +1,56 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.bitext; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class DifferentLengthRuleTest extends TestCase { + + public void testRule() throws IOException { + DifferentLengthRule rule = new DifferentLengthRule(); + //(TestTools.getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool trgLangTool = new JLanguageTool(Language.ENGLISH); + JLanguageTool srcLangTool = new JLanguageTool(Language.POLISH); + rule.setSourceLang(Language.ENGLISH); + // correct sentences: + matches = rule.match( + srcLangTool.getAnalyzedSentence("This is a test sentence."), + trgLangTool.getAnalyzedSentence("To zdanie testowe.")); + assertEquals(0, matches.length); + + matches = rule.match( + srcLangTool.getAnalyzedSentence("Click this button."), + trgLangTool.getAnalyzedSentence("Kliknij ten przycisk.")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match( + srcLangTool.getAnalyzedSentence("Open a file, and check if it is corrupt."), + trgLangTool.getAnalyzedSentence("Otwórz plik.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java new file mode 100644 index 0000000..08a104f --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/FalseFriendsAsBitextLoaderTest.java @@ -0,0 +1,91 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.bitext; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import javax.xml.parsers.ParserConfigurationException; + +import junit.framework.TestCase; + +import org.xml.sax.SAXException; + +import de.danielnaber.languagetool.AnalyzedSentence; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; +import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRule; +import de.danielnaber.languagetool.rules.patterns.bitext.FalseFriendsAsBitextLoader; + +/** + * @author Marcin Miłkowski + */ +public class FalseFriendsAsBitextLoaderTest extends TestCase { + + + public void testHintsForPolishTranslators() throws IOException, ParserConfigurationException, SAXException { + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.POLISH); + JLanguageTool trgTool = new JLanguageTool(Language.POLISH); + + FalseFriendsAsBitextLoader ruleLoader = new FalseFriendsAsBitextLoader(); + final String name = "/false-friends.xml"; + final List<BitextPatternRule> rules = ruleLoader. + getFalseFriendsAsBitext( + JLanguageTool.getDataBroker().getRulesDir() + name, + Language.ENGLISH, Language.POLISH); + + assertErrors(1, rules, "This is an absurd.", "To absurd.", langTool, trgTool); + assertErrors(1, rules, "I have to speak to my advocate.", "Muszę porozmawiać z adwokatem.", langTool, trgTool); + assertErrors(1, rules, "This is not actual.", "To nie jest aktualne.", langTool, trgTool); + assertErrors(0, rules, "This is not actual.", "To nie jest rzeczywiste.", langTool, trgTool); + } + + private List<RuleMatch> check(final List<BitextPatternRule> bRules, + final String src, final String trg, + final JLanguageTool srcTool, final JLanguageTool trgTool) throws IOException { + List<RuleMatch> allMatches = new ArrayList<RuleMatch>(); + for (BitextPatternRule bRule : bRules) { + RuleMatch[] matches = match(bRule, src, trg, srcTool, trgTool); + if (matches != null) { + for (RuleMatch match : matches) { + allMatches.add(match); + } + } + } + return allMatches; + } + + private RuleMatch[] match(final BitextPatternRule rule, final String src, final String trg, + final JLanguageTool srcLanguageTool, + final JLanguageTool trgLanguageTool) throws IOException { + final AnalyzedSentence srcText = srcLanguageTool.getAnalyzedSentence(src); + final AnalyzedSentence trgText = trgLanguageTool.getAnalyzedSentence(trg); + return rule.match(srcText, trgText); + } + + private void assertErrors(int errorCount, + final List<BitextPatternRule> rules, + final String src, final String trg, JLanguageTool srcTool, JLanguageTool trgTool) throws IOException { + List<RuleMatch> matches = check(rules, src, trg, srcTool, trgTool); + //System.err.println(matches); + assertEquals(errorCount, matches.size()); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java new file mode 100644 index 0000000..6809034 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/bitext/SameTranslationRuleTest.java @@ -0,0 +1,57 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.bitext; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class SameTranslationRuleTest extends TestCase { + + public void testRule() throws IOException { + SameTranslationRule rule = new SameTranslationRule(); + //(TestTools.getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool trgLangTool = new JLanguageTool(Language.FRENCH); + JLanguageTool srcLangTool = new JLanguageTool(Language.ENGLISH); + rule.setSourceLang(Language.ENGLISH); + // correct sentences: + matches = rule.match( + srcLangTool.getAnalyzedSentence("This is a test sentence."), + trgLangTool.getAnalyzedSentence("C'est la vie !")); + assertEquals(0, matches.length); + + //tricky: proper names should be left as is! + matches = rule.match( + srcLangTool.getAnalyzedSentence("Elvis Presley"), + trgLangTool.getAnalyzedSentence("Elvis Presley")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match( + srcLangTool.getAnalyzedSentence("This this is a test sentence."), + trgLangTool.getAnalyzedSentence("This this is a test sentence.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java new file mode 100644 index 0000000..5383ea3 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRuleTest.java @@ -0,0 +1,80 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.ca; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * + * Simple tests for rules/ca/AccentuacioSimpleReplaceRule class + * + * @author Ionuț Păduraru + */ +public class AccentuacioReplaceRuleTest extends TestCase { + + private AccentuacioReplaceRule rule; + private JLanguageTool langTool; + + protected void setUp() throws Exception { + super.setUp(); + rule = new AccentuacioReplaceRule(TestTools.getMessages("ca")); + langTool = new JLanguageTool(Language.CATALAN); + } + + public void testRule() throws IOException { + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tot està bé.")).length); + + // incorrect sentences: + + // at the beginning of a sentence (Romanian replace rule is case-sensitive) + checkSimpleReplaceRule("Pneumonia vírica.", "Pneumònia"); + // inside sentence + checkSimpleReplaceRule("Supercopa d'Europa de futbòl.", "futbol"); + } + + /** + * Check if a specific replace rule applies. + * + * @param sentence + * the sentence containing the incorrect/misspeled word. + * @param word + * the word that is correct (the suggested replacement). + * @throws IOException + */ + private void checkSimpleReplaceRule(String sentence, String word) + throws IOException { + RuleMatch[] matches; + matches = rule.match(langTool.getAnalyzedSentence(sentence)); + assertEquals("Invalid matches.length while checking sentence: " + + sentence, 1, matches.length); + assertEquals("Invalid replacement count while checking sentence: " + + sentence, 1, matches[0].getSuggestedReplacements().size()); + assertEquals("Invalid suggested replacement while checking sentence: " + + sentence, word, matches[0].getSuggestedReplacements().get(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java new file mode 100644 index 0000000..d77b935 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRuleTest.java @@ -0,0 +1,80 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.ca; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * + * Simple tests for rules/ca/SimpleReplaceRule class + * + * @author Ionuț Păduraru + */ +public class CastellanismesReplaceRuleTest extends TestCase { + + private CastellanismesReplaceRule rule; + private JLanguageTool langTool; + + protected void setUp() throws Exception { + super.setUp(); + rule = new CastellanismesReplaceRule(TestTools.getMessages("ca")); + langTool = new JLanguageTool(Language.CATALAN); + } + + public void testRule() throws IOException { + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Tot està bé.")).length); + + // incorrect sentences: + + // at the beginning of a sentence (Romanian replace rule is case-sensitive) + checkSimpleReplaceRule("Después de la mort de Lenin.", "Després"); + // inside sentence + checkSimpleReplaceRule("Un any después.", "després"); + } + + /** + * Check if a specific replace rule applies. + * + * @param sentence + * the sentence containing the incorrect/misspeled word. + * @param word + * the word that is correct (the suggested replacement). + * @throws IOException + */ + private void checkSimpleReplaceRule(String sentence, String word) + throws IOException { + RuleMatch[] matches; + matches = rule.match(langTool.getAnalyzedSentence(sentence)); + assertEquals("Invalid matches.length while checking sentence: " + + sentence, 1, matches.length); + assertEquals("Invalid replacement count wile checking sentence: " + + sentence, 1, matches[0].getSuggestedReplacements().size()); + assertEquals("Invalid suggested replacement while checking sentence: " + + sentence, word, matches[0].getSuggestedReplacements().get(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java new file mode 100644 index 0000000..a396a4d --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/AgreementRuleTest.java @@ -0,0 +1,190 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; +import java.util.List; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class AgreementRuleTest extends TestCase { + + private AgreementRule rule; + private JLanguageTool langTool; + + public void setUp() throws IOException { + rule = new AgreementRule(null); + langTool = new JLanguageTool(Language.GERMAN); + } + + public void testDetNounRule() throws IOException { + + /* debugging: + RuleMatch[] rm = rule.match(langTool.getAnalyzedSentence("Wer für die Kosten")); + System.err.println(rm[0]); + if (true) + return; + */ + + // correct sentences: + assertGood("So ist es in den USA."); + assertGood("Das ist der Tisch."); + assertGood("Das ist das Haus."); + assertGood("Das ist die Frau."); + assertGood("Das ist das Auto der Frau."); + assertGood("Das gehört dem Mann."); + assertGood("Das Auto des Mannes."); + assertGood("Das interessiert den Mann."); + assertGood("Das interessiert die Männer."); + assertGood("Das Auto von einem Mann."); + assertGood("Das Auto eines Mannes."); + assertGood("Des großen Mannes."); + + assertGood("Das Dach von meinem Auto."); + assertGood("Das Dach von meinen Autos."); + + assertGood("Das Dach meines Autos."); + assertGood("Das Dach meiner Autos."); + + assertGood("Das Dach meines großen Autos."); + assertGood("Das Dach meiner großen Autos."); + + assertGood("Das Wahlrecht, das Frauen damals zugesprochen bekamen."); + assertGood("Es war Karl, dessen Leiche Donnerstag gefunden wurde."); + + assertGood("Erst recht ich Arbeiter."); + assertGood("Erst recht wir Arbeiter."); + assertGood("Erst recht wir fleißigen Arbeiter."); + + assertGood("Dann lud er Freunde ein."); + assertGood("Dann lud sie Freunde ein."); + assertGood("Aller Kommunikation liegt dies zugrunde."); + assertGood("Pragmatisch wählt man solche Formeln als Axiome."); + assertGood("Der eine Polizist rief dem anderen zu..."); + assertGood("Das eine Kind rief dem anderen zu..."); + assertGood("Er wollte seine Interessen wahrnehmen."); + + assertGood("... wo Krieg den Unschuldigen Leid und Tod bringt."); + assertGood("Der Abschuss eines Papageien."); + + // relative clauses: + assertGood("Das Recht, das Frauen eingeräumt wird."); + assertGood("Der Mann, in dem quadratische Fische schwammen."); + assertGood("Gutenberg, der quadratische Mann."); + // TODO: not detected, because "die" is considered a relative pronoun: + //assertBad("Gutenberg, die Genie."); + + // some of these used to cause false alarms: + assertGood("Das Münchener Fest."); + assertGood("Das Münchner Fest."); + assertGood("Die Planung des Münchener Festes."); + assertGood("Das Berliner Wetter."); + assertGood("Den Berliner Arbeitern ist das egal."); + assertGood("Das Haus des Berliner Arbeiters."); + assertGood("Es gehört dem Berliner Arbeiter."); + assertGood("Das Stuttgarter Auto."); + assertGood("Das Bielefelder Radio."); + assertGood("Das Gütersloher Radio."); + + // incorrect sentences: + assertBad("Es sind die Tisch."); + assertBad("Es sind das Tisch."); + assertBad("Es sind die Haus."); + assertBad("Es sind der Haus."); + assertBad("Es sind das Frau."); + assertBad("Das Auto des Mann."); + assertBad("Das interessiert das Mann."); + assertBad("Das interessiert die Mann."); + assertBad("Das Auto ein Mannes."); + assertBad("Das Auto einem Mannes."); + assertBad("Das Auto einer Mannes."); + assertBad("Das Auto einen Mannes."); + + assertBad("Des großer Mannes."); + + assertBad("Das Dach von meine Auto."); + assertBad("Das Dach von meinen Auto."); + + assertBad("Das Dach mein Autos."); + assertBad("Das Dach meinem Autos."); + + assertBad("Das Dach meinem großen Autos."); + assertBad("Das Dach mein großen Autos."); + + assertBad("Erst recht wir fleißiges Arbeiter."); + + // TODO: not yet detected: + //assertBad("Erst recht ich fleißiges Arbeiter."); + //assertBad("Das Dach meine großen Autos."); + //assertBad("Das Dach meinen großen Autos."); + //assertBad("Das Dach meine Autos."); + //assertBad("Es ist das Haus dem Mann."); + //assertBad("Das interessiert der Männer."); + //assertBad("Das interessiert der Mann."); + //assertBad("Das gehört den Mann."); + //assertBad("Es sind der Frau."); + } + + public void testRegression() throws IOException { + JLanguageTool gramCheckerEngine = new JLanguageTool(Language.GERMAN); + gramCheckerEngine.activateDefaultPatternRules(); + // used to be not detected > 1.0.1: + String str = "Und so.\r\nDie Bier."; + List<RuleMatch> matches = gramCheckerEngine.check(str); + assertEquals(1, matches.size()); + } + + public void testDetAdjNounRule() throws IOException { + // correct sentences: + assertGood("Das ist der riesige Tisch."); + assertGood("Der riesige Tisch ist groß."); + assertGood("Die Kanten der der riesigen Tische."); + assertGood("Den riesigen Tisch mag er."); + assertGood("Es mag den riesigen Tisch."); + assertGood("Die Kante des riesigen Tisches."); + assertGood("Dem riesigen Tisch fehlt was."); + assertGood("Die riesigen Tische sind groß."); + assertGood("Der riesigen Tische wegen."); + // TODO: incorrectly detected as incorrect: + // Dann hat das natürlich Nachteile. + + // incorrect sentences: + assertBad("Es sind die riesigen Tisch."); + //assertBad("Dort, die riesigen Tischs!"); // TODO: error not detected because of comma + assertBad("Als die riesigen Tischs kamen."); + assertBad("Als die riesigen Tisches kamen."); + // TODO: not yet detected: + //assertBad("Der riesigen Tisch und so."); + } + + private void assertGood(String s) throws IOException { + assertEquals(0, rule.match(langTool.getAnalyzedSentence(s)).length); + } + + private void assertBad(String s) throws IOException { + assertEquals(1, rule.match(langTool.getAnalyzedSentence(s)).length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java new file mode 100644 index 0000000..bbabdc1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CaseRuleTest.java @@ -0,0 +1,116 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; + +/** + * @author Daniel Naber + */ +public class CaseRuleTest extends TestCase { + + public void testRule() throws IOException { + CaseRule rule = new CaseRule(null); + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Ein einfacher Satz zum Testen.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Laufen fällt mir leicht.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Winseln stört.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das schlägt nicht so zu Buche.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Dirk Hetzel ist ein Name.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Sein Verhalten war okay.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. \"Ein Zitat.\"")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. 'Ein Zitat.'")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. «Ein Zitat.»")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. »Ein Zitat.«")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz. (Noch einer.)")).length); + // works only thanks to addex.txt: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Der Nachfahre.")).length); + // both can be correct: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz, \"Ein Zitat.\"")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz, \"ein Zitat.\"")).length); + // Exception 'Le': + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Schon Le Monde schrieb das.")).length); + // unknown word: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("In Blubberdorf macht man das so.")).length); + + // sentences that used to trigger an error because of incorrect compound tokenization: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das sind Euroscheine.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("John Stallman isst.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Gesellschafterin hier.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Dienerin hier.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist die neue Geigerin hier.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die ersten Gespanne erreichen Köln.")).length); + + // used to trigger error because of wrong POS tagging: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlinge zieht sich zu.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die Schlingen ziehen sich zu.")).length); + + // TODO: nach dem Doppelpunkt wird derzeit nicht auf groß/klein getestet: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: kein Satz.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist es: Kein Satz.")).length); + + // incorrect sentences: + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Ein Einfacher Satz zum Testen.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das Winseln Stört.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Sein verhalten war okay.")).length); + } + + public void testSubstantivierteVerben() throws IOException { + CaseRule rule = new CaseRule(null); + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das fahrende Auto.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das Fahren ist einfach.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Denn das Fahren ist einfach.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das können wir so machen.")).length); + // incorrect sentences: + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das fahren ist einfach.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das fahren ist einfach.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das laufen ist einfach.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das essen ist einfach.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Denn das gehen ist einfach.")).length); + } + + public void testPhraseExceptions() throws IOException { + CaseRule rule = new CaseRule(null); + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne Wenn und Aber.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("ohne Wenn und Aber")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne Wenn und Aber bla blubb.")).length); + // as long as phrase exception isn't complete, there's no error: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn und")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("wenn und aber")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("und aber")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("aber")).length); + // incorrect sentences: + // error not found here as it's in the XML rules: + //assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das gilt ohne wenn und aber.")).length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java new file mode 100644 index 0000000..caabdce --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/CompoundRuleTest.java @@ -0,0 +1,88 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * @author Daniel Naber + */ +public class CompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.GERMAN); + rule = new CompoundRule(null); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "Eine tolle CD-ROM"); + check(0, "Eine tolle CD-ROM."); + check(0, "Ein toller CD-ROM-Test."); + check(0, "Systemadministrator"); + check(0, "System-Administrator"); + check(0, "Eine Million Dollar"); + check(0, "Das System des Administrators"); + check(0, "Nur im Stand-by-Betrieb"); + check(0, "Start, Ziel, Sieg"); + check(0, "Roll-on-roll-off-Schiff"); + // incorrect sentences: + check(1, "System Administrator", new String[]{"System-Administrator", "Systemadministrator"}); + check(1, "bla bla bla bla bla System Administrator bla bla bla bla bla"); + check(1, "System Administrator blubb"); + check(1, "Der System Administrator"); + check(1, "Der dumme System Administrator"); + check(1, "CD ROM", new String[]{"CD-ROM"}); + check(1, "Nur im Stand by Betrieb", new String[]{"Stand-by-Betrieb"}); + check(1, "Ein echter Start Ziel Sieg", new String[]{"Start-Ziel-Sieg"}); + check(1, "Ein echter Start Ziel Sieg."); + check(1, "Ein Start Ziel Sieg"); + check(1, "Start Ziel Sieg"); + check(1, "Start Ziel Sieg!"); + check(2, "Der dumme System Administrator legt die CD ROM"); + check(2, "Der dumme System Administrator legt die CD ROM."); + check(2, "Der dumme System Administrator legt die CD ROM ein blah"); + check(2, "System Administrator CD ROM"); + //FIXME: suggestions / longest match + //check(1, "Roll on roll off Schiff", new String[]{"Roll-on-roll-off-Schiff"}); + check(1, "Spin off"); + // no hyphen suggestion for some words: + check(1, "Das ist Haar sträubend", new String[]{"Haarsträubend"}); + // Only hyphen suggestion for some words: + check(1, "Reality TV", new String[]{"Reality-TV"}); + check(1, "Spin off", new String[]{"Spin-off"}); + // also accept incorrect upper/lowercase spelling: + check(1, "Spin Off", new String[]{"Spin-Off"}); + check(1, "CW Wert", new String[]{"CW-Wert"}); + // also detect an error if only some of the hyphens are missing: + check(1, "Roll-on-roll-off Schiff", new String[]{"Roll-on-roll-off-Schiff"}); + check(1, "E-Mail Adressen", new String[]{"E-Mail-Adressen"}); + // first part is a single character: + check(0, "x-mal"); + check(1, "x mal", new String[]{"x-mal"}); + check(0, "y-Achse"); + check(1, "y Achse", new String[]{"y-Achse"}); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java new file mode 100644 index 0000000..5dacdd9 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/DashRuleTest.java @@ -0,0 +1,53 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; + +/** + * @author Daniel Naber + */ +public class DashRuleTest extends TestCase { + + public void testRule() throws IOException { + DashRule rule = new DashRule(null); + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhung kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diätenerhöhung kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhungs-Manie kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- und Gehaltserhöhung kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- sowie Gehaltserhöhung kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Die große Diäten- oder Gehaltserhöhung kam dann doch.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Erst so - Karl-Heinz dann blah.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Erst so -- Karl-Heinz aber...")).length); + + // incorrect sentences: + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhung kam dann doch.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhung kam dann doch.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten-Erhöhungs- Manie kam dann doch.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Die große Diäten- Erhöhungs-Manie kam dann doch.")).length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java new file mode 100644 index 0000000..4482771 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WiederVsWiderRuleTest.java @@ -0,0 +1,49 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; + +/** + * @author Daniel Naber + */ +public class WiederVsWiderRuleTest extends TestCase { + + public void testRule() throws IOException { + WiederVsWiderRule rule = new WiederVsWiderRule(null); + JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt wider, wie es wieder läuft.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation gut wider.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation.")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation wider.")).length); + // known to match although sentence is okay: + //assertEquals(0, rule.match(langTool.getAnalyzedText("Das spiegelt wieder wider, wie es läuft.")).length); + // errors: + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das spiegelt wieder, wie es wieder läuft.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das spiegelt die Situation gut wieder.")).length); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Immer wieder spiegelt das die Situation wieder.")).length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java new file mode 100644 index 0000000..c3465f4 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/de/WordCoherencyRuleTest.java @@ -0,0 +1,87 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.de; + +import java.io.IOException; +import java.util.List; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class WordCoherencyRuleTest extends TestCase { + + public void testRule() throws IOException { + final WordCoherencyRule rule = new WordCoherencyRule(null); + final JLanguageTool langTool = new JLanguageTool(Language.GERMAN); + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist aufwendig, aber nicht zu aufwendig.")).length); + // as WordCoherencyRule keeps its state to check more than one sentence + // we need to create a new object each time: + rule.reset(); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Das ist aufwändig, aber nicht zu aufwändig.")).length); + // errors: + rule.reset(); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das ist aufwendig, aber nicht zu aufwändig.")).length); + rule.reset(); + assertEquals(1, rule.match(langTool.getAnalyzedSentence("Das ist aufwändig, aber nicht zu aufwendig.")).length); + } + + public void testRuleCompleteTexts() throws IOException { + final JLanguageTool langTool; + // complete texts: + List<RuleMatch> matches; + //matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwendig."); + //assertEquals(0, matches.size()); + langTool = new JLanguageTool(Language.GERMAN); + matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwändig."); + assertEquals(0, matches.size()); + + matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwändig."); + assertEquals(1, matches.size()); + + matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwendig."); + assertEquals(1, matches.size()); + + // also find full forms: + matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwendiger als..."); + assertEquals(0, matches.size()); + + matches = langTool.check("Das ist aufwendig. Aber hallo. Es ist wirklich aufwändiger als..."); + assertEquals(1, matches.size()); + + matches = langTool.check("Das ist aufwändig. Aber hallo. Es ist wirklich aufwendiger als..."); + assertEquals(1, matches.size()); + + matches = langTool.check("Das ist das aufwändigste. Aber hallo. Es ist wirklich aufwendiger als..."); + assertEquals(1, matches.size()); + + matches = langTool.check("Das ist das aufwändigste. Aber hallo. Es ist wirklich aufwendig."); + assertEquals(1, matches.size()); + + // cross-paragraph checks + matches = langTool.check("Das ist das aufwändigste.\n\nAber hallo. Es ist wirklich aufwendig."); + assertEquals(1, matches.size()); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java new file mode 100644 index 0000000..f9cfee1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/AvsAnRuleTest.java @@ -0,0 +1,167 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class AvsAnRuleTest extends TestCase { + + public void testRule() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was an hour ago.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A university is ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A one-way street ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An hour's work ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Going to an \"industry party\".")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An 8-year old boy ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An 18-year old boy ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("The A-levels are ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An NOP check ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A USA-wide license ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("...asked a UN member.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("In an un-united Germany...")); + assertEquals(0, matches.length); + + //fixed false alarms: + matches = rule.match(langTool.getAnalyzedSentence("Here, a and b are supplementary angles.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("The Qur'an was translated into Polish.")); + assertEquals(0, matches.length); + + // errors: + matches = rule.match(langTool.getAnalyzedSentence("It was a hour ago.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was an sentence that's long.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An university")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A unintersting ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk with an long sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A hour's work ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Going to a \"industry party\".")); + assertEquals(1, matches.length); + // With uppercase letters: + matches = rule.match(langTool.getAnalyzedSentence("A University")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A Europe wide something")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("then an University sdoj fixme sdoopsd")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A 8-year old boy ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A 18-year old boy ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("...asked an UN member.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("In a un-united Germany...")); + assertEquals(1, matches.length); + + //Test on acronyms/initials: + matches = rule.match(langTool.getAnalyzedSentence("A. R.J. Turgot")); + assertEquals(0, matches.length); + + //mixed case as dictionary-based exception + matches = rule.match(langTool.getAnalyzedSentence("Anyone for an MSc?")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Anyone for a MSc?")); + assertEquals(1, matches.length); + //mixed case from general case + matches = rule.match(langTool.getAnalyzedSentence("Anyone for an XMR-based writer?")); + assertEquals(0, matches.length); + + //Test on apostrophes + matches = rule.match(langTool.getAnalyzedSentence("Its name in English is a[1] (), plural A's, As, as, or a's.")); + assertEquals(0, matches.length); + } + + public void testSuggestions() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + assertEquals("a string", rule.suggestAorAn("string")); + assertEquals("a university", rule.suggestAorAn("university")); + assertEquals("an hour", rule.suggestAorAn("hour")); + assertEquals("an all-terrain", rule.suggestAorAn("all-terrain")); + assertEquals("a UNESCO", rule.suggestAorAn("UNESCO")); + + } + + public void testPositions() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // no quotes etc.: + matches = rule.match(langTool.getAnalyzedSentence("a industry standard.")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + // quotes.. + matches = rule.match(langTool.getAnalyzedSentence("a \"industry standard\".")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("a - industry standard\".")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("This is a \"industry standard\".")); + assertEquals(8, matches[0].getFromPos()); + assertEquals(9, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("\"a industry standard\".")); + assertEquals(1, matches[0].getFromPos()); + assertEquals(2, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("\"Many say this is a industry standard\".")); + assertEquals(18, matches[0].getFromPos()); + assertEquals(19, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("Like many \"an desperado\" before him, Bart headed south into Mexico.")); + assertEquals(11, matches[0].getFromPos()); + assertEquals(13, matches[0].getToPos()); + + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java new file mode 100644 index 0000000..0505a05 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/CompoundRuleTest.java @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * @author Daniel Naber + */ +public class CompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.ENGLISH); + rule = new CompoundRule(null); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "The software supports case-sensitive search."); + check(0, "He is one-year-old."); + // incorrect sentences: + check(1, "case sensitive", new String[]{"case-sensitive"}); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java new file mode 100644 index 0000000..c245b80 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRuleTest.java @@ -0,0 +1,151 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; +import java.util.List; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class EnglishUnpairedBracketsRuleTest extends TestCase { + + public void testRule() throws IOException { + EnglishUnpairedBracketsRule rule = new EnglishUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test sentence).")); + assertEquals(0, matches.length); + matches = rule + .match(langTool.getAnalyzedSentence("This is a word 'test'.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is the joint presidents' declaration.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("The screen is 20\" wide.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is a [test] sentence...")); + assertEquals(0, matches.length); + matches = rule + .match(langTool + .getAnalyzedSentence("The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]")); + assertEquals(0, matches.length); + matches = rule + .match(langTool + .getAnalyzedSentence("This is what he said: \"We believe in freedom. This is what we do.\"")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("(([20] [20] [20]))")); + assertEquals(0, matches.length); + // test for a case that created a false alarm after disambiguation + matches = rule.match(langTool + .getAnalyzedSentence("This is a \"special test\", right?")); + assertEquals(0, matches.length); + // numerical bullets + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in Chapter 1).")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("The jury recommended that: (1) Four additional deputies be employed.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in section 1a).")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in section iv).")); + assertEquals(0, matches.length); + + //inches exception shouldn't match " here: + matches = rule.match(langTool + .getAnalyzedSentence("In addition, the government would pay a $1,000 \"cost of education\" grant to the schools.")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Paradise lost to the alleged water needs of Texas' big cities Thursday.")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Kill 'em all!")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Puttin' on the Ritz")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test sentence.")); + assertEquals(1, matches.length); + + //tests for Edward's bug + matches = rule.match(langTool + .getAnalyzedSentence("This is a test with an apostrophe &'.")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("&'")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("!'")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("What?'")); + assertEquals(1, matches.length); + // + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test” sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is a {test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This [is (a test} sentence.")); + assertEquals(3, matches.length); + } + + public void testMultipleSentences() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + tool.enableRule("EN_UNPAIRED_BRACKETS"); + + List<RuleMatch> matches; + matches = tool + .check("This is multiple sentence text that contains a bracket:" + + "[This is bracket. With some text.] and this continues.\n"); + assertEquals(0, matches.size()); + matches = tool + .check("This is multiple sentence text that contains a bracket:" + + "[This is bracket. With some text. And this continues.\n\n"); + assertEquals(1, matches.size()); + // now with a paragraph end inside - we get two alarms because of paragraph + // resetting + matches = tool + .check("This is multiple sentence text that contains a bracket. " + + "(This is bracket. \n\n With some text.) and this continues."); + assertEquals(2, matches.size()); + } + + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java new file mode 100644 index 0000000..7163ed2 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/es/ElwithFemRuleTest.java @@ -0,0 +1,74 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.es; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Susana Sotelo Docio + * + * based on English tests + */ +public class ElwithFemRuleTest extends TestCase { + + public void testRule() throws IOException { + ElwithFemRule rule = new ElwithFemRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.SPANISH); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("El alma inmortal.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Tomaré un agua.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Usa mejor el hacha.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Lo escondí bajo el haya.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("La foto del \"aura\" se la debo a él.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Encontraron un ánfora ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Ningún acta ...")); + assertEquals(0, matches.length); + // errors: + matches = rule.match(langTool.getAnalyzedSentence("La alma inmortal.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Tomaré una agua.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Usa mejor la hacha.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Lo escondí bajo la haya.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("La foto de la \"aura\" se la debo a él.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Ninguna acta ...")); + assertEquals(1, matches.length); + // With uppercase letters: + matches = rule.match(langTool.getAnalyzedSentence("En La Haya se vive muy bien.")); + assertEquals(0, matches.length); + // With accented chars + //matches = rule.match(langTool.getAnalyzedSentence("Encontraron una ánfora ...")); + //assertEquals(1, matches.length); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java new file mode 100644 index 0000000..159e4d0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRuleTest.java @@ -0,0 +1,75 @@ + /* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.fr; +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + + +/** + * @author Marcin Miłkowski + */ + +public class QuestionWhitespaceRuleTest extends TestCase { + + + + public final void testRule() throws IOException { + QuestionWhitespaceRule rule = new QuestionWhitespaceRule(TestTools.getEnglishMessages()); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.FRENCH); + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("C'est vrai !")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est ?")).length); + assertEquals(0, rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple : philosophique")).length); + + // errors: + matches = rule.match(langTool.getAnalyzedSentence("C'est vrai!")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("C'est vrai !")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est ?")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Qu'est ce que c'est?")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple: philosophique;")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("L'enjeu de ce livre est donc triple: philosophique ;")); + assertEquals(2, matches.length); + // check match positions: + assertEquals(2, matches.length); + assertEquals(29, matches[0].getFromPos()); + assertEquals(36, matches[0].getToPos()); + assertEquals(50, matches[1].getFromPos()); + assertEquals(52, matches[1].getToPos()); + //guillemets + matches = rule.match(langTool.getAnalyzedSentence("Le guillemet ouvrant est suivi d'un espace insécable : « mais le lieu [...] et le guillemet fermant est précédé d'un espace insécable : [...] littérature ».")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Le guillemet ouvrant est suivi d'un espace insécable : «mais le lieu [...] et le guillemet fermant est précédé d'un espace insécable : [...] littérature».")); + assertEquals(2, matches.length); + } + + } + diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java new file mode 100644 index 0000000..f9cfee1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/AvsAnRuleTest.java @@ -0,0 +1,167 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class AvsAnRuleTest extends TestCase { + + public void testRule() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("This is a test sentence.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was an hour ago.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A university is ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A one-way street ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An hour's work ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Going to an \"industry party\".")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An 8-year old boy ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An 18-year old boy ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("The A-levels are ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An NOP check ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A USA-wide license ...")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("...asked a UN member.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("In an un-united Germany...")); + assertEquals(0, matches.length); + + //fixed false alarms: + matches = rule.match(langTool.getAnalyzedSentence("Here, a and b are supplementary angles.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("The Qur'an was translated into Polish.")); + assertEquals(0, matches.length); + + // errors: + matches = rule.match(langTool.getAnalyzedSentence("It was a hour ago.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was an sentence that's long.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("An university")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A unintersting ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("It was a uninteresting talk with an long sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A hour's work ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Going to a \"industry party\".")); + assertEquals(1, matches.length); + // With uppercase letters: + matches = rule.match(langTool.getAnalyzedSentence("A University")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A Europe wide something")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("then an University sdoj fixme sdoopsd")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A 8-year old boy ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A 18-year old boy ...")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("...asked an UN member.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("In a un-united Germany...")); + assertEquals(1, matches.length); + + //Test on acronyms/initials: + matches = rule.match(langTool.getAnalyzedSentence("A. R.J. Turgot")); + assertEquals(0, matches.length); + + //mixed case as dictionary-based exception + matches = rule.match(langTool.getAnalyzedSentence("Anyone for an MSc?")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Anyone for a MSc?")); + assertEquals(1, matches.length); + //mixed case from general case + matches = rule.match(langTool.getAnalyzedSentence("Anyone for an XMR-based writer?")); + assertEquals(0, matches.length); + + //Test on apostrophes + matches = rule.match(langTool.getAnalyzedSentence("Its name in English is a[1] (), plural A's, As, as, or a's.")); + assertEquals(0, matches.length); + } + + public void testSuggestions() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + assertEquals("a string", rule.suggestAorAn("string")); + assertEquals("a university", rule.suggestAorAn("university")); + assertEquals("an hour", rule.suggestAorAn("hour")); + assertEquals("an all-terrain", rule.suggestAorAn("all-terrain")); + assertEquals("a UNESCO", rule.suggestAorAn("UNESCO")); + + } + + public void testPositions() throws IOException { + AvsAnRule rule = new AvsAnRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // no quotes etc.: + matches = rule.match(langTool.getAnalyzedSentence("a industry standard.")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + // quotes.. + matches = rule.match(langTool.getAnalyzedSentence("a \"industry standard\".")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("a - industry standard\".")); + assertEquals(0, matches[0].getFromPos()); + assertEquals(1, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("This is a \"industry standard\".")); + assertEquals(8, matches[0].getFromPos()); + assertEquals(9, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("\"a industry standard\".")); + assertEquals(1, matches[0].getFromPos()); + assertEquals(2, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("\"Many say this is a industry standard\".")); + assertEquals(18, matches[0].getFromPos()); + assertEquals(19, matches[0].getToPos()); + + matches = rule.match(langTool.getAnalyzedSentence("Like many \"an desperado\" before him, Bart headed south into Mexico.")); + assertEquals(11, matches[0].getFromPos()); + assertEquals(13, matches[0].getToPos()); + + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java new file mode 100644 index 0000000..0505a05 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/CompoundRuleTest.java @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * @author Daniel Naber + */ +public class CompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.ENGLISH); + rule = new CompoundRule(null); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "The software supports case-sensitive search."); + check(0, "He is one-year-old."); + // incorrect sentences: + check(1, "case sensitive", new String[]{"case-sensitive"}); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java new file mode 100644 index 0000000..c245b80 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/nb/EnglishUnpairedBracketsRuleTest.java @@ -0,0 +1,151 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.en; + +import java.io.IOException; +import java.util.List; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class EnglishUnpairedBracketsRuleTest extends TestCase { + + public void testRule() throws IOException { + EnglishUnpairedBracketsRule rule = new EnglishUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.ENGLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test sentence).")); + assertEquals(0, matches.length); + matches = rule + .match(langTool.getAnalyzedSentence("This is a word 'test'.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is the joint presidents' declaration.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("The screen is 20\" wide.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is a [test] sentence...")); + assertEquals(0, matches.length); + matches = rule + .match(langTool + .getAnalyzedSentence("The plight of Tamil refugees caused a surge of support from most of the Tamil political parties.[90]")); + assertEquals(0, matches.length); + matches = rule + .match(langTool + .getAnalyzedSentence("This is what he said: \"We believe in freedom. This is what we do.\"")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("(([20] [20] [20]))")); + assertEquals(0, matches.length); + // test for a case that created a false alarm after disambiguation + matches = rule.match(langTool + .getAnalyzedSentence("This is a \"special test\", right?")); + assertEquals(0, matches.length); + // numerical bullets + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in Chapter 1).")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("The jury recommended that: (1) Four additional deputies be employed.")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in section 1a).")); + assertEquals(0, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("We discussed this in section iv).")); + assertEquals(0, matches.length); + + //inches exception shouldn't match " here: + matches = rule.match(langTool + .getAnalyzedSentence("In addition, the government would pay a $1,000 \"cost of education\" grant to the schools.")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Paradise lost to the alleged water needs of Texas' big cities Thursday.")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Kill 'em all!")); + assertEquals(0, matches.length); + + matches = rule.match(langTool + .getAnalyzedSentence("Puttin' on the Ritz")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test sentence.")); + assertEquals(1, matches.length); + + //tests for Edward's bug + matches = rule.match(langTool + .getAnalyzedSentence("This is a test with an apostrophe &'.")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("&'")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("!'")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("What?'")); + assertEquals(1, matches.length); + // + matches = rule.match(langTool + .getAnalyzedSentence("(This is a test” sentence.")); + assertEquals(2, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This is a {test sentence.")); + assertEquals(1, matches.length); + matches = rule.match(langTool + .getAnalyzedSentence("This [is (a test} sentence.")); + assertEquals(3, matches.length); + } + + public void testMultipleSentences() throws IOException { + final JLanguageTool tool = new JLanguageTool(Language.ENGLISH); + tool.enableRule("EN_UNPAIRED_BRACKETS"); + + List<RuleMatch> matches; + matches = tool + .check("This is multiple sentence text that contains a bracket:" + + "[This is bracket. With some text.] and this continues.\n"); + assertEquals(0, matches.size()); + matches = tool + .check("This is multiple sentence text that contains a bracket:" + + "[This is bracket. With some text. And this continues.\n\n"); + assertEquals(1, matches.size()); + // now with a paragraph end inside - we get two alarms because of paragraph + // resetting + matches = tool + .check("This is multiple sentence text that contains a bracket. " + + "(This is bracket. \n\n With some text.) and this continues."); + assertEquals(2, matches.size()); + } + + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java new file mode 100644 index 0000000..8410cff --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/ElementTest.java @@ -0,0 +1,43 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.patterns; + +import junit.framework.TestCase; + +public class ElementTest extends TestCase { + + + public void testSentStart() { + Element elem = new Element("", false, false, false); + elem.setPosElement("SENT_START", false, false); + assertTrue(elem.isSentStart()); + elem.setPosElement("SENT_START", false, true); + assertFalse(elem.isSentStart()); + elem.setPosElement("SENT_START", true, false); + assertTrue(elem.isSentStart()); + elem.setPosElement("SENT_START", true, true); + assertFalse(elem.isSentStart()); + + //this should be false: + elem = new Element("bla|blah", false, true, false); + elem.setPosElement("foo", true, true); + assertFalse(elem.isSentStart()); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java new file mode 100644 index 0000000..14d73ac --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleTest.java @@ -0,0 +1,87 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.patterns; + +import java.io.IOException; +import java.util.List; + +import javax.xml.parsers.ParserConfigurationException; + +import junit.framework.TestCase; + +import org.xml.sax.SAXException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class FalseFriendRuleTest extends TestCase { + + public void testHintsForGermanSpeakers() throws IOException, ParserConfigurationException, SAXException { + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.GERMAN); + langTool.activateDefaultFalseFriendRules(); + assertErrors(1, "We will berate you.", langTool); + assertErrors(0, "We will give you advice.", langTool); + assertErrors(1, "I go to high school in Foocity.", langTool); + } + + public void testHintsForEnglishSpeakers() throws IOException, ParserConfigurationException, SAXException { + JLanguageTool langTool = new JLanguageTool(Language.GERMAN, Language.ENGLISH); + langTool.activateDefaultFalseFriendRules(); + assertErrors(1, "Man sollte ihn nicht so beraten.", langTool); + assertErrors(0, "Man sollte ihn nicht so beschimpfen.", langTool); + assertErrors(1, "Ich gehe in Blubbstadt zur Hochschule.", langTool); + } + + public void testHintsForPolishSpeakers() throws IOException, ParserConfigurationException, SAXException { + JLanguageTool langTool = new JLanguageTool(Language.ENGLISH, Language.POLISH); + langTool.activateDefaultFalseFriendRules(); + assertErrors(1, "This is an absurd.", langTool); + assertErrors(0, "This is absurdity.", langTool); + assertSuggestions(0, "This is absurdity.", langTool); + assertErrors(1, "I have to speak to my advocate.", langTool); + assertSuggestions(3, "My brother is politic.", langTool); + } + + private void assertErrors(int errorCount, String s, JLanguageTool langTool) throws IOException { + List<RuleMatch> matches = langTool.check(s); + //System.err.println(matches); + assertEquals(errorCount, matches.size()); + } + + private void assertSuggestions(final int suggestionCount, final String s, final JLanguageTool langTool) throws IOException { + final List<RuleMatch> matches = langTool.check(s); + int suggFound = 0; + for (final RuleMatch match : matches) { + int pos = 0; + while (pos != -1) { + pos = match.getMessage().indexOf("<suggestion>", pos + 1); + suggFound ++; + } + } + if (suggFound > 0) { + suggFound--; + } + assertEquals(suggestionCount, suggFound); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java new file mode 100644 index 0000000..a1dfeaa --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java @@ -0,0 +1,502 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.patterns; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedSentence; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.IncorrectExample; +import de.danielnaber.languagetool.rules.Rule; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * @author Daniel Naber + */ +public class PatternRuleTest extends TestCase { + + private static JLanguageTool langTool; + + private static final Pattern PROBABLE_REGEX = Pattern.compile("[^\\[\\]\\*\\+\\|\\^\\{\\}\\?][\\[\\]\\*\\+\\|\\^\\{\\}\\?]|\\\\[^0-9]|\\(.+\\)|\\.."); + + private static final Pattern CASE_REGEX = Pattern.compile("\\[(.)(.)\\]"); + + + @Override + public void setUp() throws IOException { + if (langTool == null) { + langTool = new JLanguageTool(Language.ENGLISH); + } + } + + public void testGrammarRulesFromXML() throws IOException { + testGrammarRulesFromXML(null, false); + } + + private void testGrammarRulesFromXML(final Set<Language> ignoredLanguages, + final boolean verbose) throws IOException { + for (final Language lang : Language.LANGUAGES) { + if (ignoredLanguages != null && ignoredLanguages.contains(lang)) { + if (verbose) { + System.out.println("Ignoring tests for " + lang.getName()); + } + continue; + } + if (verbose) { + System.out.println("Running tests for " + lang.getName() + "..."); + } + final PatternRuleLoader ruleLoader = new PatternRuleLoader(); + final JLanguageTool languageTool = new JLanguageTool(lang); + final String name = "/" + lang.getShortName() + "/grammar.xml"; + final List<PatternRule> rules = ruleLoader.getRules(JLanguageTool.getDataBroker(). + getFromRulesDirAsStream(name), name); + warnIfRegexpSyntax(rules, lang); + testGrammarRulesFromXML(rules, languageTool, lang); + } + } + + // TODO: probably this would be more useful for exceptions + // instead of adding next methods to PatternRule + // we can probably validate using XSD and specify regexes straight there + private void warnIfRegexpSyntax(final List<PatternRule> rules, + final Language lang) { + for (final PatternRule rule : rules) { + int i = 0; + for (final Element element : rule.getElements()) { + i++; + warnIfElementNotKosher(element, lang, rule.getId()); + if (element.getExceptionList() != null) { + for (final Element exception: element.getExceptionList()) { + warnIfElementNotKosher(exception, lang, rule.getId() + + " (exception in token [" + i + "]:" + element +") "); + } + } + } + } + } + + private void warnIfElementNotKosher(final Element element, + final Language lang, final String ruleId) { + if (!element.isRegularExpression() + && (PROBABLE_REGEX.matcher(element.getString()) + .find())) { + System.err.println("The " + lang.toString() + " rule: " + + ruleId + " contains element " + "\"" + element + + "\" that is not marked as regular expression" + + " but probably is one."); + } + if (element.isRegularExpression() && "".equals(element.getString())) { + System.err.println("The " + lang.toString() + " rule: " + + ruleId + " contains an empty string element " + "\"" + element + + "\" that is marked as regular expression (don't look at the POS tag, it might be OK)."); + } else if (element.isRegularExpression() + && !PROBABLE_REGEX.matcher(element.getString()) + .find()) { + System.err.println("The " + lang.toString() + " rule: " + + ruleId + " contains element " + "\"" + element + + "\" that is marked as regular expression" + + " but probably is not one."); + } + + if (element.isInflected() + && "".equals(element.getString())) { + System.err.println("The " + lang.toString() + " rule: " + + ruleId + " contains element " + "\"" + element + + "\" that is marked as inflected" + + " but is empty, so the attribute is redundant."); + } + + if (element.isRegularExpression() && !element.getCaseSensitive()) { + Matcher matcher = CASE_REGEX.matcher(element.getString()); + if (matcher.find()) { + final String letter1 = matcher.group(1); + final String letter2 = matcher.group(2); + + if (!letter1.equals(letter2) + && letter1.toLowerCase().equals(letter2.toLowerCase())) { + System.err.println("The " + lang.toString() + " rule: " + + ruleId + " contains regexp part [" + letter1 + letter2 + + "] which is useless without case_sensitive=\"yes\"."); + } + } + } + + if (element.isRegularExpression() && element.getString().contains("|")) { + final String[] groups = element.getString().split("\\)"); + final boolean caseSensitive = element.getCaseSensitive(); + for (final String group : groups) { + final String[] alt = group.split("\\|"); + final Set<String> partSet = new HashSet<String>(); + final Set<String> partSetNoCase = new HashSet<String>(); + for (String part : alt) { + String partNoCase = caseSensitive ? part : part.toLowerCase(); + if (partSetNoCase.contains(partNoCase)) { + if (partSet.contains(part)) { + // Duplicate disjunction parts "foo|foo". + System.err.println("The " + lang.toString() + " rule : " + + ruleId + " contains duplicated disjunction part (" + + part + ") within the element " + "\"" + element + "\"."); + } else { + // Duplicate disjunction parts "Foo|foo" since element ignores case. + System.err.println("The " + lang.toString() + " rule : " + + ruleId + " contains duplicated non case sensitive disjunction part (" + + part + ") within the element " + "\"" + element + "\". Did you " + + "forget case_sensitive=\"yes\"?"); + } + } + partSetNoCase.add(partNoCase); + partSet.add(part); + } + } + } + } + + + private void testGrammarRulesFromXML(final List<PatternRule> rules, + final JLanguageTool languageTool, final Language lang) throws IOException { + int noSuggestionCount = 0; + final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>(); + for (final PatternRule rule : rules) { + final List<String> goodSentences = rule.getCorrectExamples(); + for (String goodSentence : goodSentences) { + // enable indentation use + goodSentence = goodSentence.replaceAll("[\\n\\t]+", ""); + goodSentence = cleanXML(goodSentence); + assertTrue(goodSentence.trim().length() > 0); + assertFalse(lang + ": Did not expect error in: " + goodSentence + + " (Rule: " + rule + ")", match(rule, goodSentence, languageTool)); + } + final List<IncorrectExample> badSentences = rule.getIncorrectExamples(); + for (IncorrectExample origBadExample : badSentences) { + // enable indentation use + String origBadSentence = origBadExample.getExample().replaceAll( + "[\\n\\t]+", ""); + final List<String> suggestedCorrection = origBadExample + .getCorrections(); + final int expectedMatchStart = origBadSentence.indexOf("<marker>"); + final int expectedMatchEnd = origBadSentence.indexOf("</marker>") + - "<marker>".length(); + if (expectedMatchStart == -1 || expectedMatchEnd == -1) { + fail(lang + + ": No error position markup ('<marker>...</marker>') in bad example in rule " + + rule); + } + final String badSentence = cleanXML(origBadSentence); + assertTrue(badSentence.trim().length() > 0); + RuleMatch[] matches = getMatches(rule, badSentence, languageTool); + if (!rule.isWithComplexPhrase()) { + assertTrue(lang + ": Did expect one error in: \"" + badSentence + + "\" (Rule: " + rule + "), got " + matches.length + + ". Additional info:" + rule.getMessage(), matches.length == 1); + assertEquals(lang + + ": Incorrect match position markup (start) for rule " + rule, + expectedMatchStart, matches[0].getFromPos()); + assertEquals(lang + + ": Incorrect match position markup (end) for rule " + rule, + expectedMatchEnd, matches[0].getToPos()); + // make sure suggestion is what we expect it to be + if (suggestedCorrection != null && suggestedCorrection.size() > 0) { + assertTrue("You specified a correction but your message has no suggestions in rule " + rule, + rule.getMessage().contains("<suggestion>") + ); + assertTrue(lang + ": Incorrect suggestions: " + + suggestedCorrection.toString() + " != " + + matches[0].getSuggestedReplacements() + " for rule " + rule, + suggestedCorrection.equals(matches[0] + .getSuggestedReplacements())); + } + // make sure the suggested correction doesn't produce an error: + if (matches[0].getSuggestedReplacements().size() > 0) { + final int fromPos = matches[0].getFromPos(); + final int toPos = matches[0].getToPos(); + for (final String repl : matches[0].getSuggestedReplacements()) { + final String fixedSentence = badSentence.substring(0, fromPos) + + repl + badSentence.substring(toPos); + matches = getMatches(rule, fixedSentence, languageTool); + if (matches.length > 0) { + fail("Incorrect input:\n" + + " " + badSentence + + "\nCorrected sentence:\n" + + " " + fixedSentence + + "\nBy Rule:\n" + + " " + rule + + "\nThe correction triggered an error itself:\n" + + " " + matches[0] + "\n"); + } + } + } else { + noSuggestionCount++; + } + } else { // for multiple rules created with complex phrases + + matches = getMatches(rule, badSentence, languageTool); + if (matches.length == 0 + && !complexRules.containsKey(rule.getId() + badSentence)) { + complexRules.put(rule.getId() + badSentence, rule); + } + + if (matches.length != 0) { + complexRules.put(rule.getId() + badSentence, null); + assertTrue(lang + ": Did expect one error in: \"" + badSentence + + "\" (Rule: " + rule + "), got " + matches.length, + matches.length == 1); + assertEquals(lang + + ": Incorrect match position markup (start) for rule " + rule, + expectedMatchStart, matches[0].getFromPos()); + assertEquals(lang + + ": Incorrect match position markup (end) for rule " + rule, + expectedMatchEnd, matches[0].getToPos()); + // make sure suggestion is what we expect it to be + if (suggestedCorrection != null && suggestedCorrection.size() > 0) { + assertTrue( + lang + ": Incorrect suggestions: " + + suggestedCorrection.toString() + " != " + + matches[0].getSuggestedReplacements() + " for rule " + + rule, suggestedCorrection.equals(matches[0] + .getSuggestedReplacements())); + } + // make sure the suggested correction doesn't produce an error: + if (matches[0].getSuggestedReplacements().size() > 0) { + final int fromPos = matches[0].getFromPos(); + final int toPos = matches[0].getToPos(); + for (final String repl : matches[0].getSuggestedReplacements()) { + final String fixedSentence = badSentence.substring(0, fromPos) + + repl + badSentence.substring(toPos); + matches = getMatches(rule, fixedSentence, languageTool); + assertEquals("Corrected sentence for rule " + rule + + " triggered error: " + fixedSentence, 0, matches.length); + } + } else { + noSuggestionCount++; + } + } + } + + } + } + if (!complexRules.isEmpty()) { + final Set<String> set = complexRules.keySet(); + final List<PatternRule> badRules = new ArrayList<PatternRule>(); + final Iterator<String> iter = set.iterator(); + while (iter.hasNext()) { + final PatternRule badRule = complexRules.get(iter.next()); + if (badRule != null) { + badRule.notComplexPhrase(); + badRule + .setMessage("The rule contains a phrase that never matched any incorrect example."); + badRules.add(badRule); + } + } + if (!badRules.isEmpty()) { + testGrammarRulesFromXML(badRules, languageTool, lang); + } + } + } + + protected String cleanXML(final String str) { + return str.replaceAll("<([^<].*?)>", ""); + } + + private boolean match(final Rule rule, final String sentence, + final JLanguageTool languageTool) throws IOException { + final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence); + final RuleMatch[] matches = rule.match(text); + return matches.length > 0; + } + + private RuleMatch[] getMatches(final Rule rule, final String sentence, + final JLanguageTool languageTool) throws IOException { + final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence); + final RuleMatch[] matches = rule.match(text); + /* + * for (int i = 0; i < matches.length; i++) { + * System.err.println(matches[i]); } + */ + return matches; + } + + public void testUppercasingSuggestion() throws IOException { + final JLanguageTool langTool = new JLanguageTool(Language.ENGLISH); + langTool.activateDefaultPatternRules(); + final List<RuleMatch> matches = langTool + .check("Were are in the process of ..."); + assertEquals(1, matches.size()); + final RuleMatch match = matches.get(0); + final List<String> sugg = match.getSuggestedReplacements(); + assertEquals(2, sugg.size()); + assertEquals("Where", sugg.get(0)); + assertEquals("We", sugg.get(1)); + } + + public void testRule() throws IOException { + PatternRule pr; + RuleMatch[] matches; + + pr = makePatternRule("one"); + matches = pr + .match(langTool.getAnalyzedSentence("A non-matching sentence.")); + assertEquals(0, matches.length); + matches = pr.match(langTool + .getAnalyzedSentence("A matching sentence with one match.")); + assertEquals(1, matches.length); + assertEquals(25, matches[0].getFromPos()); + assertEquals(28, matches[0].getToPos()); + // these two are not set if the rule is called standalone (not via + // JLanguageTool): + assertEquals(-1, matches[0].getColumn()); + assertEquals(-1, matches[0].getLine()); + assertEquals("ID1", matches[0].getRule().getId()); + assertTrue(matches[0].getMessage().equals("user visible message")); + assertTrue(matches[0].getShortMessage().equals("short comment")); + matches = pr.match(langTool + .getAnalyzedSentence("one one and one: three matches")); + assertEquals(3, matches.length); + + pr = makePatternRule("one two"); + matches = pr.match(langTool.getAnalyzedSentence("this is one not two")); + assertEquals(0, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("this is two one")); + assertEquals(0, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("this is one two three")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("one two")); + assertEquals(1, matches.length); + + pr = makePatternRule("one|foo|xxxx two", false, true); + matches = pr.match(langTool.getAnalyzedSentence("one foo three")); + assertEquals(0, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("one two")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("foo two")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("one foo two")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("y x z one two blah foo")); + assertEquals(1, matches.length); + + pr = makePatternRule("one|foo|xxxx two|yyy", false, true); + matches = pr.match(langTool.getAnalyzedSentence("one, yyy")); + assertEquals(0, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("one yyy")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("xxxx two")); + assertEquals(1, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("xxxx yyy")); + assertEquals(1, matches.length); + } + + private PatternRule makePatternRule(final String s) { + return makePatternRule(s, false, false); + } + + private PatternRule makePatternRule(final String s, + final boolean caseSensitive, final boolean regex) { + final List<Element> elems = new ArrayList<Element>(); + final String[] parts = s.split(" "); + boolean pos = false; + Element se = null; + for (final String element : parts) { + if (element.equals("SENT_START")) { + pos = true; + } + if (!pos) { + se = new Element(element, caseSensitive, regex, false); + } else { + se = new Element("", caseSensitive, regex, false); + } + if (pos) { + se.setPosElement(element, false, false); + } + elems.add(se); + pos = false; + } + final PatternRule rule = new PatternRule("ID1", Language.ENGLISH, elems, + "test rule", "user visible message", "short comment"); + return rule; + } + + public void testSentenceStart() throws IOException { + PatternRule pr; + RuleMatch[] matches; + + pr = makePatternRule("SENT_START One"); + matches = pr.match(langTool.getAnalyzedSentence("Not One word.")); + assertEquals(0, matches.length); + matches = pr.match(langTool.getAnalyzedSentence("One word.")); + assertEquals(1, matches.length); + } + + private static String callFormatMultipleSynthesis(final String[] suggs, + final String left, final String right) throws IllegalArgumentException, + SecurityException, InvocationTargetException, IllegalAccessException, + NoSuchMethodException { + Class[] argClasses = { String[].class, String.class, String.class }; + Object[] argObjects = { suggs, left, right }; + return TestTools.callStringStaticMethod(PatternRule.class, + "formatMultipleSynthesis", argClasses, argObjects); + } + + /* test private methods as well */ + public void testformatMultipleSynthesis() throws IllegalArgumentException, + SecurityException, InvocationTargetException, IllegalAccessException, + NoSuchMethodException { + final String[] suggArray = { "blah blah", "foo bar" }; + + assertEquals( + "This is how you should write: <suggestion>blah blah</suggestion>, <suggestion>foo bar</suggestion>.", + + callFormatMultipleSynthesis(suggArray, + "This is how you should write: <suggestion>", "</suggestion>.")); + + final String[] suggArray2 = { "test", " " }; + + assertEquals( + "This is how you should write: <suggestion>test</suggestion>, <suggestion> </suggestion>.", + + callFormatMultipleSynthesis(suggArray2, + "This is how you should write: <suggestion>", "</suggestion>.")); + } + + /** + * Test XML patterns, as a help for people developing rules that are not + * programmers. + */ + public static void main(final String[] args) throws IOException { + final PatternRuleTest prt = new PatternRuleTest(); + System.out.println("Running XML pattern tests..."); + prt.setUp(); + final Set<Language> ignoredLanguages = new HashSet<Language>(); + // ignoredLanguages.add(Language.CZECH); // has no XML rules yet + prt.testGrammarRulesFromXML(ignoredLanguages, true); + System.out.println("Tests successful."); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java new file mode 100644 index 0000000..ec05e25 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/TestUnifier.java @@ -0,0 +1,283 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.patterns; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.List; +import java.util.ArrayList; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class TestUnifier extends TestCase { + + // trivial unification = test if the character case is the same + public void testUnificationCase() { + Unifier uni = new Unifier(); + Element elLower = new Element("\\p{Ll}+", true, true, false); + Element elUpper = new Element("\\p{Lu}\\p{Ll}+", true, true, false); + Element elAllUpper = new Element("\\p{Lu}+$", true, true, false); + uni.setEquivalence("case-sensitivity", "lowercase", elLower); + uni.setEquivalence("case-sensitivity", "uppercase", elUpper); + uni.setEquivalence("case-sensitivity", "alluppercase", elAllUpper); + AnalyzedToken lower1 = new AnalyzedToken("lower", "JJR", "lower"); + AnalyzedToken lower2 = new AnalyzedToken("lowercase", "JJ", "lowercase"); + AnalyzedToken upper1 = new AnalyzedToken("Uppercase", "JJ", "Uppercase"); + AnalyzedToken upper2 = new AnalyzedToken("John", "NNP", "John"); + AnalyzedToken upperall1 = new AnalyzedToken("JOHN", "NNP", "John"); + AnalyzedToken upperall2 = new AnalyzedToken("JAMES", "NNP", "James"); + + Map<String, List<String>> equiv = new HashMap<String, List<String>>(); + List<String> list1 = new ArrayList<String>(); + list1.add("lowercase"); + equiv.put("case-sensitivity", list1); + boolean satisfied = uni.isSatisfied(lower1, equiv); + satisfied &= uni.isSatisfied(lower2, equiv); + uni.startUnify(); + assertEquals(true, satisfied); + uni.reset(); + satisfied = uni.isSatisfied(upper2, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(lower2, equiv); + assertEquals(false, satisfied); + uni.reset(); + satisfied = uni.isSatisfied(upper1, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(lower1, equiv); + assertEquals(false, satisfied); + uni.reset(); + satisfied = uni.isSatisfied(upper2, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(upper1, equiv); + assertEquals(false, satisfied); + uni.reset(); + equiv.clear(); + list1.clear(); + list1.add("uppercase"); + equiv.put("case-sensitivity", list1); + satisfied = uni.isSatisfied(upper2, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(upper1, equiv); + assertEquals(true, satisfied); + uni.reset(); + equiv.clear(); + list1.clear(); + list1.add("alluppercase"); + equiv.put("case-sensitivity", list1); + satisfied = uni.isSatisfied(upper2, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(upper1, equiv); + assertEquals(false, satisfied); + uni.reset(); + satisfied = uni.isSatisfied(upperall2, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(upperall1, equiv); + assertEquals(true, satisfied); + } + + // slightly non-trivial unification = + // test if the grammatical number is the same + public void testUnificationNumber() { + Unifier uni = new Unifier(); + Element sgElement = new Element("", false, false, false); + sgElement.setPosElement(".*[\\.:]sg:.*", true, false); + uni.setEquivalence("number", "singular", sgElement); + Element plElement = new Element("", false, false, false); + plElement.setPosElement(".*[\\.:]pl:.*", true, false); + uni.setEquivalence("number", "plural", plElement); + + AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah", "mały"); + AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah", "człowiek"); + + Map<String, List<String>> equiv = new HashMap<String, List<String>>(); + List<String> list1 = new ArrayList<String>(); + list1.add("singular"); + equiv.put("number", list1); + + boolean satisfied = uni.isSatisfied(sing1, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + assertEquals(true, satisfied); + uni.reset(); + + //for multiple readings - OR for interpretations, AND for tokens + AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały"); + satisfied = uni.isSatisfied(sing1, equiv); + satisfied |= uni.isSatisfied(sing1a, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + assertEquals(true, satisfied); + uni.reset(); + + //check if any of the equivalences is there + list1.add("plural"); + equiv.clear(); + equiv.put("number", list1); + sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały"); + satisfied = uni.isSatisfied(sing1, equiv); + satisfied |= uni.isSatisfied(sing1a, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + assertEquals(true, satisfied); + uni.reset(); + +//now test all possible feature equivalences by leaving type blank + sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały"); + equiv.clear(); + equiv.put("number", null); + satisfied = uni.isSatisfied(sing1, equiv); + satisfied |= uni.isSatisfied(sing1a, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + assertEquals(true, satisfied); + uni.reset(); + +//test non-agreeing tokens with blank types + satisfied = uni.isSatisfied(sing1a, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + assertEquals(false, satisfied); + uni.reset(); + } + +//slightly non-trivial unification = + // test if the grammatical number is the same + public void testUnificationNumberGender() { + Unifier uni = new Unifier(); + Element sgElement = new Element("", false, false, false); + sgElement.setPosElement(".*[\\.:]sg:.*", true, false); + uni.setEquivalence("number", "singular", sgElement); + Element plElement = new Element("", false, false, false); + plElement.setPosElement(".*[\\.:]pl:.*", true, false); + uni.setEquivalence("number", "plural", plElement); + + Element femElement = new Element("", false, false, false); + femElement.setPosElement(".*[\\.:]f", true, false); + uni.setEquivalence("gender", "feminine", femElement); + + Element mascElement = new Element("", false, false, false); + mascElement.setPosElement(".*[\\.:]m", true, false); + uni.setEquivalence("gender", "masculine", mascElement); + + AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały"); + AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:sg:blahblah:f", "mały"); + AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:m", "mały"); + AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek"); + + Map<String, List<String>> equiv = new HashMap<String, List<String>>(); + equiv.put("number", null); + equiv.put("gender", null); + + boolean satisfied = uni.isSatisfied(sing1, equiv); + satisfied |= uni.isSatisfied(sing1a, equiv); + satisfied |= uni.isSatisfied(sing1b, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + uni.startNextToken(); + assertEquals(true, satisfied); + assertEquals("[mały/adj:sg:blahblah:m, człowiek/subst:sg:blahblah:m]", Arrays.toString(uni.getUnifiedTokens())); + uni.reset(); + } + + // checks if all tokens share the same set of + // features to be unified + public void testMultiplefeats() { + Unifier uni = new Unifier(); + Element sgElement = new Element("", false, false, false); + sgElement.setPosElement(".*[\\.:]sg:.*", true, false); + uni.setEquivalence("number", "singular", sgElement); + Element plElement = new Element("", false, false, false); + plElement.setPosElement(".*[\\.:]pl:.*", true, false); + uni.setEquivalence("number", "plural", plElement); + Element femElement = new Element("", false, false, false); + femElement.setPosElement(".*[\\.:]f([\\.:].*)?", true, false); + uni.setEquivalence("gender", "feminine", femElement); + Element mascElement = new Element("", false, false, false); + mascElement.setPosElement(".*[\\.:]m([\\.:].*)?", true, false); + uni.setEquivalence("gender", "masculine", mascElement); + Element neutElement = new Element("", false, false, false); + neutElement.setPosElement(".*[\\.:]n([\\.:].*)?", true, false); + uni.setEquivalence("gender", "neutral", neutElement); + + AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały"); + AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały"); + AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały"); + AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony"); + AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek"); + + Map<String, List<String>> equiv = new HashMap<String, List<String>>(); + equiv.put("number", null); + equiv.put("gender", null); + + boolean satisfied = uni.isSatisfied(sing1, equiv); + satisfied |= uni.isSatisfied(sing1a, equiv); + satisfied |= uni.isSatisfied(sing1b, equiv); + uni.startUnify(); + satisfied &= uni.isSatisfied(sing2, equiv); + uni.startNextToken(); + satisfied &= uni.isSatisfied(sing3, equiv); + uni.startNextToken(); + assertEquals(false, satisfied); + uni.reset(); + + //now test the simplified interface + satisfied = true; //this must be true to start with... + satisfied &= uni.isUnified(sing1, equiv, false, false); + satisfied &= uni.isUnified(sing1a, equiv, false, false); + satisfied &= uni.isUnified(sing1b, equiv, false, true); + satisfied &= uni.isUnified(sing2, equiv, false, true); + satisfied &= uni.isUnified(sing3, equiv, false, true); + assertEquals(false, satisfied); + uni.reset(); + + sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty"); + sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty"); + sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło"); + + satisfied = true; + satisfied &= uni.isUnified(sing1a, equiv, false, false); + satisfied &= uni.isUnified(sing1b, equiv, false, true); + satisfied &= uni.isUnified(sing2, equiv, false, true); + assertEquals(true, satisfied); + assertEquals("[osobisty/adj:sg:nom.acc.voc:n:pos:aff, godło/subst:sg:nom.acc.voc:n]", Arrays.toString(uni.getFinalUnified())); + uni.reset(); + + //now test a case when the last reading doesn't match at all + + sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty"); + sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty"); + AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło"); + AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło"); + + satisfied = true; + satisfied &= uni.isUnified(sing1a, equiv, false, false); + satisfied &= uni.isUnified(sing1b, equiv, false, true); + satisfied &= uni.isUnified(sing2a, equiv, false, false); + satisfied &= uni.isUnified(sing2b, equiv, false, true); + assertEquals(true, satisfied); + assertEquals("[osobisty/adj:sg:nom.acc.voc:n:pos:aff, godło/subst:sg:nom.acc.voc:n]", Arrays.toString(uni.getFinalUnified())); + uni.reset(); + + } + + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java new file mode 100644 index 0000000..93cc3ea --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/CompoundRuleTest.java @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.pl; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * @author Daniel Naber + */ +public class CompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.POLISH); + rule = new CompoundRule(null); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "Nie róbmy nic na łapu-capu."); + check(0, "Jedzmy kogel-mogel."); + // incorrect sentences: + check(1, "bim bom", new String[]{"bim-bom"}); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java new file mode 100644 index 0000000..52bcd6f --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRuleTest.java @@ -0,0 +1,56 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.pl; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class PolishUnpairedBracketsRuleTest extends TestCase { + + public void testRulePolish() throws IOException { + PolishUnpairedBracketsRule rule = new PolishUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.POLISH); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.POLISH); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(To jest zdanie do testowania).")); + assertEquals(0, matches.length); + // correct sentences: + matches = rule + .match(langTool + .getAnalyzedSentence("Piosenka ta trafiła na wiele list \"Best of...\", włączając w to te, które zostały utworzone przez magazyn Rolling Stone.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("A \"B\" C.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("\"A\" B \"C\".")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("W tym zdaniu jest niesparowany „cudzysłów.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java new file mode 100644 index 0000000..070f3f2 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRuleTest.java @@ -0,0 +1,51 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.pl; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.RuleMatch; + +public class PolishWordRepeatRuleTest extends TestCase { + + /* + * Test method for 'de.danielnaber.languagetool.rules.pl.PolishWordRepeatRule.match(AnalyzedSentence)' + */ + public void testRule() throws IOException { + final PolishWordRepeatRule rule = new PolishWordRepeatRule(null); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.POLISH); + //correct + matches = rule.match(langTool.getAnalyzedSentence("To jest zdanie próbne.")); + assertEquals(0, matches.length); + //repeated prepositions, don't count'em + matches = rule.match(langTool.getAnalyzedSentence("Na dyskotece tańczył jeszcze, choć był na bani.")); + assertEquals(0, matches.length); + //incorrect + matches = rule.match(langTool.getAnalyzedSentence("Był on bowiem pięknym strzelcem bowiem.")); + assertEquals(1, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("Mówiła długo, żeby tylko mówić długo.")); + assertEquals(2, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java new file mode 100644 index 0000000..5689a72 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/pl/SimpleReplaceRuleTest.java @@ -0,0 +1,80 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.pl; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * + * Simple tests for rules/pl/SimpleReplaceRule class + * + * @author Ionuț Păduraru + */ +public class SimpleReplaceRuleTest extends TestCase { + + private SimpleReplaceRule rule; + private JLanguageTool langTool; + + protected void setUp() throws Exception { + super.setUp(); + rule = new SimpleReplaceRule(TestTools.getMessages("pl")); + langTool = new JLanguageTool(Language.POLISH); + } + + public void testRule() throws IOException { + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Wszystko w porządku.")).length); + + // incorrect sentences: + + // at the beginning of a sentence (Romanian replace rule is case-sensitive) + checkSimpleReplaceRule("Piaty przypadek.", "Piąty"); + // inside sentence + checkSimpleReplaceRule("To piaty przypadek.", "piąty"); + } + + /** + * Check if a specific replace rule applies. + * + * @param sentence + * the sentence containing the incorrect/misspeled word. + * @param word + * the word that is correct (the suggested replacement). + * @throws IOException + */ + private void checkSimpleReplaceRule(String sentence, String word) + throws IOException { + RuleMatch[] matches; + matches = rule.match(langTool.getAnalyzedSentence(sentence)); + assertEquals("Invalid matches.length while checking sentence: " + + sentence, 1, matches.length); + assertEquals("Invalid replacement count wile checking sentence: " + + sentence, 1, matches[0].getSuggestedReplacements().size()); + assertEquals("Invalid suggested replacement while checking sentence: " + + sentence, word, matches[0].getSuggestedReplacements().get(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java new file mode 100644 index 0000000..f6f6200 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/CompoundRuleTest.java @@ -0,0 +1,53 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.ro; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * Tests for {@link CompoundRule} class. + * + * @author Ionuț Păduraru + */ +public class CompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.ROMANIAN); + rule = new CompoundRule(TestTools.getMessages("ro")); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "Au plecat câteșitrei."); + // incorrect sentences: + check(1, "câte și trei", new String[] { "câteșitrei" }); + check(1, "Câte și trei", new String[] { "Câteșitrei" }); + check(1, "câte-și-trei", new String[] { "câteșitrei" }); + + check(1, "tus trei", new String[] { "tustrei" }); + check(1, "tus-trei", new String[] { "tustrei" }); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java new file mode 100644 index 0000000..eab518f --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/RomanianPatternRuleTest.java @@ -0,0 +1,55 @@ +package de.danielnaber.languagetool.rules.ro; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.rules.patterns.PatternRuleTest; + +/** + * + * @author Ionuț Păduraru + * @since 07.05.2009 21:07:12 + * + * This testcase is not for actual rules but for PatternRuleTest to ensure proper + * xml cleaning: there is a romanian rule where "<<" is used; we want + * "X<marker><<</marker>Y" to be tranformed into "X<<Y", not into "XY" + * (see rule id GHILIMELE_DUBLE_INTERIOR_INCEPUT in ro/grammar.xml). + * + */ +public class RomanianPatternRuleTest extends TestCase { + + private PatternRuleTestWrapper patternRuleTestWrapper = new PatternRuleTestWrapper(); + + /** + * wrapper on PatternRuleTestWrapper to expose cleanXML method + * + * @author Ionuț Păduraru + * @since 07.05.2009 21:11:01 + */ + private static class PatternRuleTestWrapper extends PatternRuleTest { + @Override + public String cleanXML(String str) { + return super.cleanXML(str); + } + } + + public String cleanXML(String str) { + return patternRuleTestWrapper.cleanXML(str); + } + + /** + * Ensure proper xml cleanining in PatternRuleTest + * + * @author Ionuț Păduraru + * @since 07.05.2009 21:11:30 + * @throws Exception + */ + public void testCleanXML() throws Exception { + assertEquals(cleanXML("1<mark>2"), "12"); + assertEquals(cleanXML("1</mark>2"), "12"); + assertEquals(cleanXML("1<</mark>2"), "1<2"); + assertEquals(cleanXML("<</mark>2"), "<2"); + assertEquals(cleanXML("></mark>2"), ">2"); + assertEquals(cleanXML("1<mark>abc</mark>2"), "1abc2"); + assertEquals(cleanXML("1<mark><<</mark>2"), "1<<2"); + assertEquals(cleanXML("1<mark>>></mark>2"), "1>>2"); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java new file mode 100644 index 0000000..76d7549 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ro/SimpleReplaceRuleTest.java @@ -0,0 +1,153 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.ro; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +/** + * + * Simple tests for rules/ro/SimpleReplaceRule class + * + * @author Ionuț Păduraru + */ +public class SimpleReplaceRuleTest extends TestCase { + + private SimpleReplaceRule rule; + private JLanguageTool langTool; + + protected void setUp() throws Exception { + super.setUp(); + rule = new SimpleReplaceRule(TestTools.getMessages("ro")); + langTool = new JLanguageTool(Language.ROMANIAN); + } + + /** + * Make sure that the suggested word is not the same as the wrong word + */ + public void testInvalidSuggestion() { + List<String> invalidSuggestions = new ArrayList<String>(); + List<Map<String,String>> wrongWords = rule.getWrongWords(); + for (Map<String, String> ruleEntry : wrongWords) { + for (String fromWord : ruleEntry.keySet()) { + String toWord = ruleEntry.get(fromWord); + if (toWord == null || fromWord.equals(toWord)) { + invalidSuggestions.add(toWord); + } + } + } + if (!invalidSuggestions.isEmpty()) { + fail("Invalid suggestions found for: " + Arrays.toString(invalidSuggestions.toArray(new String[]{}))); + } + } + public void testRule() throws IOException { + + // correct sentences: + assertEquals(0, rule.match(langTool.getAnalyzedSentence("Paisprezece case.")).length); + + // incorrect sentences: + + // at the beginning of a sentence (Romanian replace rule is case-sensitive) + checkSimpleReplaceRule("Patrusprezece case.", "Paisprezece"); + // inside sentence + checkSimpleReplaceRule("Satul are patrusprezece case.", "paisprezece"); + checkSimpleReplaceRule("Satul are (patrusprezece) case.", "paisprezece"); + checkSimpleReplaceRule("Satul are «patrusprezece» case.", "paisprezece"); + + checkSimpleReplaceRule("El are șasesprezece ani.", "șaisprezece"); + checkSimpleReplaceRule("El a luptat pentru întâiele cărți.", "întâile"); + checkSimpleReplaceRule("El are cinsprezece cărți.", "cincisprezece"); + checkSimpleReplaceRule("El a fost patruzecioptist.", "pașoptist"); + checkSimpleReplaceRule("M-am adresat întâiei venite.", "întâii"); + checkSimpleReplaceRule("M-am adresat întâielor venite.", "întâilor"); + checkSimpleReplaceRule("A ajuns al douăzecelea.", "douăzecilea"); + checkSimpleReplaceRule("A ajuns al zecilea.", "zecelea"); + checkSimpleReplaceRule("A primit jumate de litru de lapte.", "jumătate"); + + // multiple words / compounds + // space-delimited + checkSimpleReplaceRule("aqua forte", "acvaforte"); + checkSimpleReplaceRule("aqua forte.", "acvaforte"); + checkSimpleReplaceRule("A folosit «aqua forte».", "acvaforte"); + checkSimpleReplaceRule("Aqua forte.", "Acvaforte"); + checkSimpleReplaceRule("este aqua forte", "acvaforte"); + checkSimpleReplaceRule("este aqua forte.", "acvaforte"); + checkSimpleReplaceRule("este Aqua Forte.", "Acvaforte"); + checkSimpleReplaceRule("este AquA Forte.", "Acvaforte"); + checkSimpleReplaceRule("A primit jumate de litru de lapte și este aqua forte.", "jumătate", "acvaforte"); + checkSimpleReplaceRule("du-te vino", "du-te-vino"); + // dash-delimited + checkSimpleReplaceRule("cou-boi", "cowboy"); + checkSimpleReplaceRule("cow-boy", "cowboy"); + checkSimpleReplaceRule("cau-boi", "cowboy"); + checkSimpleReplaceRule("Cau-boi", "Cowboy"); + checkSimpleReplaceRule("cowboy"); // correct, no replacement + checkSimpleReplaceRule("Iată un cau-boi", "cowboy"); + checkSimpleReplaceRule("Iată un cau-boi.", "cowboy"); + checkSimpleReplaceRule("Iată un (cau-boi).", "cowboy"); + checkSimpleReplaceRule("văcar=cau-boi", "cowboy"); + + + // multiple suggestions + checkSimpleReplaceRule("A fost adăogită o altă regulă.", "adăugită/adăugată"); + checkSimpleReplaceRule("A venit adinioarea.", "adineaori/adineauri"); + + // words with multiple wrong forms + checkSimpleReplaceRule("A pus axterix.", "asterisc"); + checkSimpleReplaceRule("A pus axterics.", "asterisc"); + checkSimpleReplaceRule("A pus asterics.", "asterisc"); + } + + /** + * Check if a specific replace rule applies. + * + * @param sentence + * the sentence containing the incorrect/misspeled word. + * @param words + * the words that are correct (the suggested replacement). Use "/" to separate multiple forms. + * @throws IOException + */ + private void checkSimpleReplaceRule(String sentence, String... words) + throws IOException { + RuleMatch[] matches; + matches = rule.match(langTool.getAnalyzedSentence(sentence)); + assertEquals("Invalid matches.length while checking sentence: " + + sentence, words.length, matches.length); + for (int i = 0; i < words.length; i++) { + String word = words[i]; + String[] replacements = word.split("\\/"); + assertEquals("Invalid replacement count wile checking sentence: " + + sentence, replacements.length, matches[i].getSuggestedReplacements().size()); + for (int j = 0; j < replacements.length; j++) { + assertEquals("Invalid suggested replacement while checking sentence: " + + sentence, replacements[j], matches[i].getSuggestedReplacements().get(j)); + } + } + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java new file mode 100644 index 0000000..7b293e2 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRuleTest.java @@ -0,0 +1,55 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.ru; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +/* + * RuSimpleReplaceRuleTest + * @ author Yakov Reztsov + * + */ + + +public class RuSimpleReplaceRuleTest extends TestCase { + + public void testRule() throws IOException { + RuSimpleReplaceRule rule = new RuSimpleReplaceRule(TestTools.getMessages("ru")); + + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.RUSSIAN); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Книга была порвана.")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Книга была порвата.")); + assertEquals(1, matches.length); + assertEquals(1, matches[0].getSuggestedReplacements().size()); + assertEquals("порвана", matches[0].getSuggestedReplacements().get(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java new file mode 100644 index 0000000..a4552f8 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianCompoundRuleTest.java @@ -0,0 +1,62 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.rules.ru; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.CompoundRuleTestAbs; + +/** + * Russian Compound rule test + * @author Yakov Reztsov + * Based on German Compound rule test + * @author Daniel Naber + */ +public class RussianCompoundRuleTest extends CompoundRuleTestAbs { + + protected void setUp() throws Exception { + super.setUp(); + langTool = new JLanguageTool(Language.RUSSIAN); + rule = new RussianCompoundRule(null); + } + + public void testRule() throws IOException { + // correct sentences: + check(0, "Он вышел из-за дома."); + // Both suggestion for some words: + check(0, "естественно-научный"); + // incorrect sentences: + check(1, "из за", new String[]{"из-за"}); + check(1, "нет нет из за да да"); + //FIXME: suggestions / longest match + check(1, "Ростов на Дону", new String[]{"Ростов-на-Дону"}); + // no hyphen suggestion for some words: + check(1, "кругло суточный", new String[]{"круглосуточный"}); + // also accept incorrect upper/lowercase spelling: + check(1, "Ростов на дону", new String[]{"Ростов-на-дону"}); + // also detect an error if only some of the hyphens are missing: + check(1, "Ростов-на Дону", new String[]{"Ростов-на-Дону"}); + // first part is a single character: + check(0, "во-первых"); + check(1, "во первых", new String[]{"во-первых"}); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java new file mode 100644 index 0000000..8375efc --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRuleTest.java @@ -0,0 +1,56 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.ru; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; +import junit.framework.TestCase; + +public class RussianUnpairedBracketsRuleTest extends TestCase { + + public void testRulePolish() throws IOException { + RussianUnpairedBracketsRule rule = new RussianUnpairedBracketsRule(TestTools + .getEnglishMessages(), Language.RUSSIAN); + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.RUSSIAN); + // correct sentences: + matches = rule.match(langTool + .getAnalyzedSentence("(О жене и детях не беспокойся, я беру их на свои руки).")); + assertEquals(0, matches.length); + // correct sentences: + matches = rule + .match(langTool + .getAnalyzedSentence("Позже выходит другая «южная поэма» «Бахчисарайский фонтан» (1824).")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("А \"б\" Д.")); + assertEquals(0, matches.length); + matches = rule.match(langTool.getAnalyzedSentence("а), б), Д)..., ДД), аа) и 1а)")); + assertEquals(0, matches.length); + // incorrect sentences: + matches = rule.match(langTool + .getAnalyzedSentence("В таком ключе был начат в мае 1823 в Кишинёве роман в стихах «Евгений Онегин.")); + assertEquals(1, matches.length); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java new file mode 100644 index 0000000..e1f2fd0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/PunctuationCheckRuleTest.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.uk; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + +public class PunctuationCheckRuleTest extends TestCase { + + public void testRule() throws IOException { + PunctuationCheckRule rule = new PunctuationCheckRule(TestTools.getEnglishMessages()); + + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.UKRAINIAN); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Дві, коми. Ось: дві!!!")); + assertEquals(0, matches.length); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("- Це ваша пряма мова?!!")); + assertEquals(0, matches.length); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Дві,- коми!..")); + assertEquals(0, matches.length); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Два пробіли.")); // поки що ігноруємо - не царська це справа :) + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Дві крапки..")); + assertEquals(1, matches.length); + assertEquals(1, matches[0].getSuggestedReplacements().size()); + assertEquals(".", matches[0].getSuggestedReplacements().get(0)); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Дві,, коми.")); + assertEquals(1, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Не там ,кома.")); + assertEquals(1, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Двокрапка:- з тире.")); + assertEquals(1, matches.length); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java new file mode 100644 index 0000000..98822af --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/uk/SimpleReplaceRuleTest.java @@ -0,0 +1,49 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.rules.uk; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.rules.RuleMatch; + + +public class SimpleReplaceRuleTest extends TestCase { + + public void testRule() throws IOException { + SimpleReplaceRule rule = new SimpleReplaceRule(TestTools.getEnglishMessages()); + + RuleMatch[] matches; + JLanguageTool langTool = new JLanguageTool(Language.UKRAINIAN); + + // correct sentences: + matches = rule.match(langTool.getAnalyzedSentence("Ці рядки повинні збігатися.")); + assertEquals(0, matches.length); + + // incorrect sentences: + matches = rule.match(langTool.getAnalyzedSentence("Ці рядки повинні співпадати.")); + assertEquals(1, matches.length); + assertEquals(1, matches[0].getSuggestedReplacements().size()); + assertEquals("збігатися", matches[0].getSuggestedReplacements().get(0)); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java new file mode 100644 index 0000000..1255bcd --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/server/HTTPServerTest.java @@ -0,0 +1,117 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.server; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.XMLValidator; +import de.danielnaber.languagetool.tools.StringTools; + +public class HTTPServerTest extends TestCase { + + public void testHTTPServer() { + HTTPServer server = new HTTPServer(); + try { + server.run(); + // no error: + String enc = "UTF-8"; + assertEquals("<?xml version=\"1.0\" encoding=\""+enc+"\"?>\n<matches>\n</matches>\n", check(Language.GERMAN, "")); + assertEquals("<?xml version=\"1.0\" encoding=\""+enc+"\"?>\n<matches>\n</matches>\n", check(Language.GERMAN, "Ein kleiner test")); + // one error: + assertTrue(check(Language.GERMAN, "ein kleiner test").indexOf("UPPERCASE_SENTENCE_START") != -1); + // two errors: + String result = check(Language.GERMAN, "ein kleiner test. Und wieder Erwarten noch was: \u00f6\u00e4\u00fc\u00df."); + assertTrue(result.indexOf("UPPERCASE_SENTENCE_START") != -1); + assertTrue(result.indexOf("WIEDER_WILLEN") != -1); + assertTrue("Expected special chars, got: '" + result+ "'", + result.indexOf("\u00f6\u00e4\u00fc\u00df") != -1); // special chars are intact + XMLValidator validator = new XMLValidator(); + validator.validateXMLString(result, JLanguageTool.getDataBroker().getResourceDir() + "/api-output.dtd", "matches"); + validator.checkSimpleXMLString(result); + //System.err.println(result); + // make sure XML chars are escaped in the result to avoid invalid XML + // and XSS attacks: + assertTrue(check(Language.GERMAN, "bla <script>").indexOf("<script>") == -1); + + // other tests for special characters + String germanSpecialChars = check(Language.GERMAN, "ein kleiner test. Und wieder Erwarten noch was: öäüß öäüß."); + assertTrue("Expected special chars, got: '" + germanSpecialChars+ "'", germanSpecialChars.contains("öäüß")); + String romanianSpecialChars = check(Language.ROMANIAN, "bla bla șțîâă șțîâă și câteva caractere speciale"); + assertTrue("Expected special chars, got: '" + romanianSpecialChars+ "'", romanianSpecialChars.contains("șțîâă")); + String polishSpecialChars = check(Language.POLISH, "Mówiła długo, żeby tylko mówić mówić długo."); + assertTrue("Expected special chars, got: '" + polishSpecialChars+ "'", polishSpecialChars.contains("mówić")); + // test http POST + assertTrue(checkByPOST(Language.ROMANIAN, "greșit greșit").indexOf("greșit") != -1); + // test supported language listing + URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT + "/Languages"); + String languagesXML = StringTools.streamToString((InputStream)url.getContent()); + if (!languagesXML.contains("Romanian") || !languagesXML.contains("English")) + fail("Error getting supported languages: " + languagesXML); + // tests for "&" character + assertTrue(check(Language.ENGLISH, "Me & you you").contains("&")); + // tests for mother tongue (copy from link {@link FalseFriendRuleTest}) + assertTrue(check(Language.ENGLISH, Language.GERMAN, "We will berate you").indexOf("BERATE") != -1); + assertTrue(check(Language.GERMAN, Language.ENGLISH, "Man sollte ihn nicht so beraten.").indexOf("BERATE") != -1); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + server.stop(); + } + } + + private String check(Language lang, String text) throws IOException { + return check(lang, null, text); + } + + private String check(Language lang, Language motherTongue, String text) throws IOException { + String urlOptions = "/?language=" + lang.getShortName(); + urlOptions += "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like polish, romanian, etc + if (null != motherTongue) + urlOptions += "&motherTongue="+motherTongue.getShortName(); + URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT + urlOptions); + InputStream stream = (InputStream)url.getContent(); + String result = StringTools.streamToString(stream); + return result; + } + + /** + * Same as {@link #check(Language, String)} but using HTTP POST method instead of GET + */ + private String checkByPOST(Language lang, String text) throws IOException { + String postData = "language=" + lang.getShortName(); + postData += "&text=" + URLEncoder.encode(text, "UTF-8"); // latin1 is not enough for languages like polish, romanian, etc + URL url = new URL("http://localhost:" + HTTPServer.DEFAULT_PORT); + URLConnection connection = url.openConnection(); + connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); + wr.write(postData); + wr.flush(); + String result = StringTools.streamToString(connection.getInputStream()); + return result; + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java new file mode 100644 index 0000000..987038e --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizerTest.java @@ -0,0 +1,51 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.en; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class EnglishSynthesizerTest extends TestCase { + + private final AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + public final void testSynthesizeStringString() throws IOException { + EnglishSynthesizer synth = new EnglishSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), + "blablabla").length, 0); + + assertEquals("[were, was]", Arrays.toString(synth.synthesize(dummyToken("be"), "VBD"))); + assertEquals("[presidents]", Arrays.toString(synth.synthesize(dummyToken("president"), "NNS"))); + assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD"))); + assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD", false))); + //with regular expressions + assertEquals("[tested]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD", true))); + assertEquals("[tested, testing]", Arrays.toString(synth.synthesize(dummyToken("test"), "VBD|VBG", true))); + //with special indefinite article + assertEquals("[a university, the university]", Arrays.toString(synth.synthesize(dummyToken("university"), "+DT", false))); + assertEquals("[an hour, the hour]", Arrays.toString(synth.synthesize(dummyToken("hour"), "+DT", false))); + assertEquals("[an hour]", Arrays.toString(synth.synthesize(dummyToken("hour"), "+INDT", false))); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java new file mode 100644 index 0000000..4558d75 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizerTest.java @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.es; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class SpanishSynthesizerTest extends TestCase { + private final AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + public final void testSynthesizeStringString() throws IOException { + SpanishSynthesizer synth = new SpanishSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), + "blablabla").length, 0); + + assertEquals("[temiera, temiese]", Arrays.toString(synth.synthesize(dummyToken("temer"), "VMSI3S0"))); + assertEquals("[presidentes]", Arrays.toString(synth.synthesize(dummyToken("presidente"), "NCMP000"))); + assertEquals("[contéis]", Arrays.toString(synth.synthesize(dummyToken("contar"), "VMSP2P0"))); + assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM"))); + assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM", false))); + //with regular expressions + assertEquals("[probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM", true))); + assertEquals("[probando, probado]", Arrays.toString(synth.synthesize(dummyToken("probar"), "VMP00SM|VMG0000", true))); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java new file mode 100644 index 0000000..8f206c1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizerTest.java @@ -0,0 +1,46 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.nl; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class DutchSynthesizerTest extends TestCase { + + private final AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + public final void testSynthesizeStringString() throws IOException { + DutchSynthesizer synth = new DutchSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), + "blablabla").length, 0); + + assertEquals("[zwommen]", Arrays.toString(synth.synthesize(dummyToken("zwemmen"), "VBh"))); + assertEquals("[Afro-Surinamers]", Arrays.toString(synth.synthesize(dummyToken("Afro-Surinamer"), "NN2"))); + assertEquals("[hebt, heeft]", Arrays.toString(synth.synthesize(dummyToken("hebben"), "VB3", true))); + //with regular expressions + assertEquals("[doorgeseind]", Arrays.toString(synth.synthesize(dummyToken("doorseinen"), "VBp", true))); + assertEquals("[doorsein, doorseint, doorseinden, doorseinde, doorseinen, doorgeseind, doorgeseinde]", Arrays.toString(synth.synthesize(dummyToken("doorseinen"), "VB.*", true))); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java new file mode 100644 index 0000000..00092d0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizerTest.java @@ -0,0 +1,49 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.pl; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class PolishSynthesizerTest extends TestCase { + private AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + + public final void testSynthesizeString() throws IOException { + PolishSynthesizer synth = new PolishSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), "blablabla").length, 0); + + assertEquals("[Aaru]", Arrays.toString(synth.synthesize(dummyToken("Aar"), "subst:sg:gen:m3"))); + assertEquals("[Abchazem]", Arrays.toString(synth.synthesize(dummyToken("Abchaz"), "subst:sg:inst:m1"))); + assertEquals("[nieduży]", Arrays.toString(synth.synthesize(dummyToken("duży"), "adj:sg:nom:m:pos:neg"))); + assertEquals("[miała]", Arrays.toString(synth.synthesize(dummyToken("mieć"), "verb:praet:sg:ter:f:perf"))); + assertEquals("[brzydziej]", Arrays.toString(synth.synthesize(dummyToken("brzydko"), "adv:comp"))); + //with regular expressions + assertEquals("[tonera]", Arrays.toString(synth.synthesize(dummyToken("toner"), ".*sg.*[\\.:]gen.*", true))); + assertEquals("[niedużego, nieduży, niedużemu, niedużego, niedużym, nieduży, nieduży]", Arrays.toString(synth.synthesize(dummyToken("duży"), "adj:sg.*(m[0-9]?|m.n):pos:neg", true))); + assertEquals("[miałabym, miałbym, miałabyś, miałbyś, miałaby, miałby, miałoby, miałam, miałem, miałaś, miałeś, miała, miał, miało]", + Arrays.toString(synth.synthesize(dummyToken("mieć"), ".*praet:sg.*", true))); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java new file mode 100644 index 0000000..1cfc572 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizerTest.java @@ -0,0 +1,83 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.ro; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class RomanianSynthesizerTest extends TestCase { + + private final AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + + /** + * + * @author Ionuț Păduraru + * @since 08.03.2009 18:44:25 + * @throws IOException + */ + public final void testSynthesizeStringString() throws IOException { + RomanianSynthesizer synth = new RomanianSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), "blablabla").length, 0); + + // a alege + // forma de infinitiv + assertEquals("[alege]", Arrays.toString(synth.synthesize( + dummyToken("alege"), "V000000f00"))); + // conjunctiv, pers a doua plural + assertEquals("[alegeți]", Arrays.toString(synth.synthesize( + dummyToken("alege"), "V0p2000cz0"))); + + // a fi + assertEquals("[fi]", Arrays.toString(synth.synthesize( + dummyToken("fi"), "V000000f0f"))); + // indicativ prezent, pers a doua plural + assertEquals("[sunteți]", Arrays.toString(synth.synthesize( + dummyToken("fi"), "V0p2000izf"))); + // indicativ prezent, pers a treia plural + assertEquals("[sunt]", Arrays.toString(synth.synthesize( + dummyToken("fi"), "V0p3000izf"))); + // indicativ prezent, pers întâi plural + assertEquals("[sunt]", Arrays.toString(synth.synthesize( + dummyToken("fi"), "V0s1000izf"))); + // RegExp + // indicativ prezent, pers a doua plural SAU indicativ prezent, pers a treia plural + assertEquals("[sunteți, sunt]", Arrays.toString(synth.synthesize( + dummyToken("fi"), "V0p2000izf|V0p3000izf", true))); + + // diverse + // indicativ, mai mult ca perfect, persoana întâi, plural + assertEquals("[merseserăm]", Arrays.toString(synth.synthesize( + dummyToken("merge"), "V0p1000im0"))); + // indicativ, mai mult ca perfect, persoana întâi, singular + assertEquals("[mersesem]", Arrays.toString(synth.synthesize( + dummyToken("merge"), "V0s1000im0"))); + assertEquals("[legătura]", Arrays.toString(synth.synthesize( + dummyToken("legătură"), "Sfs3aac000"))); + assertEquals("[legătură]", Arrays.toString(synth.synthesize( + dummyToken("legătură"), "Sfs3anc000"))); + + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java new file mode 100644 index 0000000..4530597 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizerTest.java @@ -0,0 +1,43 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.synthesis.sk; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; + +public class SlovakSynthesizerTest extends TestCase { + + private final AnalyzedToken dummyToken(String tokenStr) { + return new AnalyzedToken(tokenStr, tokenStr, tokenStr); + } + public final void testSynthesizeStringString() throws IOException { + SlovakSynthesizer synth = new SlovakSynthesizer(); + assertEquals(synth.synthesize(dummyToken("blablabla"), + "blablabla").length, 0); + + assertEquals("[časopisu]", Arrays.toString(synth.synthesize(dummyToken("časopis"), "SSis2"))); + //with regular expressions + assertEquals("[časopisy, časopisov, časopisom, časopisy, časopisy, časopisoch, časopismi, časopis, časopisu, časopisu, časopis, časopis, časopise, časopisom]", Arrays.toString(synth.synthesize(dummyToken("časopis"), "SS.*", true))); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java new file mode 100644 index 0000000..432e8da --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ManualTaggerTest.java @@ -0,0 +1,48 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging; + +import java.io.IOException; +import java.util.Arrays; + +import de.danielnaber.languagetool.JLanguageTool; + +import junit.framework.TestCase; + +/** + * @author Daniel Naber + */ +public class ManualTaggerTest extends TestCase { + + private static final String MANUAL_DICT_FILENAME = "/de/added.txt"; + + public void testManualTagger() throws IOException { + ManualTagger mt = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream(MANUAL_DICT_FILENAME)); + assertNull(mt.lookup("")); + assertNull(mt.lookup("gibtsnicht")); + + assertEquals("[Trotz, SUB:NOM:SIN:MAS]", Arrays.toString(mt.lookup("Trotz"))); + // lookup is case sensitive: + assertNull(mt.lookup("trotz")); + + assertEquals("[Interesse, SUB:NOM:PLU:NEU, Interesse, SUB:AKK:PLU:NEU, Interesse, SUB:GEN:PLU:NEU]", + Arrays.toString(mt.lookup("Interessen"))); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java new file mode 100644 index 0000000..95b37ce --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ca/CatalanTaggerTest.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.ca; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class CatalanTaggerTest extends TestCase { + + private CatalanTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new CatalanTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Sóc un home molt honrat.", + "Sóc/[ser]VSIP1S0 -- un/[un]DI0MS0|un/[un]PI0MS000 -- home/[home]I|home/[home]NCMS000 -- molt/[molt]DI0MS0|molt/[molt]PI0MS000|molt/[molt]RG -- honrat/[honrar]VMP00SM", tokenizer, tagger); +// Need to fix the separator character: al - a+el+SP+DA +// TestTools.myAssert("Frase recitada al matí.", +// "Frase/[frase]NCFS000 -- recitada/[recitar]VMP00SF -- al/[a]el+SP+DA -- matí/[matar]VMIS1S0|[matí]NCMS000", tokenizer, tagger); + TestTools.myAssert("blablabla","blablabla/[null]null", tokenizer, tagger); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java new file mode 100644 index 0000000..7aa2e9b --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/cs/CzechTaggerTest.java @@ -0,0 +1,59 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.cs; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class CzechTaggerTest extends TestCase { + + private CzechTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new CzechTagger(); + tokenizer = new WordTokenizer(); + } + +/* public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + }*/ + + public void testTagger() throws IOException { + //TestTools.myAssert("Ukončuje větu rozkazovací či zvolací.", "Ukončuje/[ukončovat]k5eAaImIp3nS větu/[věta]k1gFnSc4 rozkazovací/[rozkazovací]k2eAgFnPc1d1 či/[či]k8 zvolací/[zvolací]k2eAgFnPc1d1", tokenizer, tagger); + //TestTools.myAssert("Nahrazuje vynechané písmeno, používá se pro zkracování letopočtů.", "Nahrazuje/[nahrazovat]k5eAaImIp3nS vynechané/[vynechaný]k2eAgFnPc1d1 písmeno/[písmeno]k1gNnSc1|písmeno/[písmena]k1gFnSc5 používá/[používat]k5eAaImIp3nS se/[se]k3c4 pro/[pro]k7 zkracování/[zkracování]k1gNnPc1 letopočtů/[letopočet]k1gInPc2", tokenizer, tagger); + + TestTools.myAssert("Nejkratší věta.", + "Nejkratší/[krátký]k2eAgFnPc1d3|Nejkratší/[krátký]k2eAgFnPc4d3|Nejkratší/[krátký]k2eAgFnPc5d3|Nejkratší/[krátký]k2eAgFnSc1d3|Nejkratší/[krátký]k2eAgFnSc2d3|Nejkratší/[krátký]k2eAgFnSc3d3|Nejkratší/[krátký]k2eAgFnSc4d3|Nejkratší/[krátký]k2eAgFnSc5d3|Nejkratší/[krátký]k2eAgFnSc6d3|Nejkratší/[krátký]k2eAgFnSc7d3|Nejkratší/[krátký]k2eAgInPc1d3|Nejkratší/[krátký]k2eAgInPc4d3|Nejkratší/[krátký]k2eAgInPc5d3|Nejkratší/[krátký]k2eAgInSc1d3|Nejkratší/[krátký]k2eAgInSc4d3|Nejkratší/[krátký]k2eAgInSc5d3|Nejkratší/[krátký]k2eAgMnPc1d3|Nejkratší/[krátký]k2eAgMnPc4d3|Nejkratší/[krátký]k2eAgMnPc5d3|Nejkratší/[krátký]k2eAgMnSc1d3|Nejkratší/[krátký]k2eAgMnSc5d3|Nejkratší/[krátký]k2eAgNnPc1d3|Nejkratší/[krátký]k2eAgNnPc4d3|Nejkratší/[krátký]k2eAgNnPc5d3|Nejkratší/[krátký]k2eAgNnSc1d3|Nejkratší/[krátký]k2eAgNnSc4d3|Nejkratší/[krátký]k2eAgNnSc5d3 -- věta/[věta]k1gFnSc1", tokenizer, tagger); + TestTools.myAssert("zvolací.", + "zvolací/[zvolací]k2eAgFnPc1d1|zvolací/[zvolací]k2eAgFnPc4d1|zvolací/[zvolací]k2eAgFnPc5d1|zvolací/[zvolací]k2eAgFnSc1d1|zvolací/[zvolací]k2eAgFnSc2d1|zvolací/[zvolací]k2eAgFnSc3d1|zvolací/[zvolací]k2eAgFnSc4d1|zvolací/[zvolací]k2eAgFnSc5d1|zvolací/[zvolací]k2eAgFnSc6d1|zvolací/[zvolací]k2eAgFnSc7d1|zvolací/[zvolací]k2eAgInPc1d1|zvolací/[zvolací]k2eAgInPc4d1|zvolací/[zvolací]k2eAgInPc5d1|zvolací/[zvolací]k2eAgInSc1d1|zvolací/[zvolací]k2eAgInSc4d1|zvolací/[zvolací]k2eAgInSc5d1|zvolací/[zvolací]k2eAgMnPc1d1|zvolací/[zvolací]k2eAgMnPc4d1|zvolací/[zvolací]k2eAgMnPc5d1|zvolací/[zvolací]k2eAgMnSc1d1|zvolací/[zvolací]k2eAgMnSc5d1|zvolací/[zvolací]k2eAgNnPc1d1|zvolací/[zvolací]k2eAgNnPc4d1|zvolací/[zvolací]k2eAgNnPc5d1|zvolací/[zvolací]k2eAgNnSc1d1|zvolací/[zvolací]k2eAgNnSc4d1|zvolací/[zvolací]k2eAgNnSc5d1", tokenizer, tagger); + TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java new file mode 100644 index 0000000..7bf3c14 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/de/GermanTaggerTest.java @@ -0,0 +1,117 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.de; + +import java.io.IOException; + +import de.danielnaber.languagetool.JLanguageTool; + +import junit.framework.TestCase; +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +/** + * @author Daniel Naber + */ +public class GermanTaggerTest extends TestCase { + + public void testTagger() throws IOException { + GermanTagger tagger = new GermanTagger(); + AnalyzedGermanTokenReadings aToken = tagger.lookup("Haus"); + assertEquals("Haus[SUB:AKK:SIN:NEU, SUB:DAT:SIN:NEU, SUB:NOM:SIN:NEU]", aToken.toSortedString()); + assertEquals("Haus", aToken.getReadings().get(0).getLemma()); + assertEquals("Haus", aToken.getReadings().get(1).getLemma()); + assertEquals("Haus", aToken.getReadings().get(2).getLemma()); + + aToken = tagger.lookup("Hauses"); + assertEquals("Hauses[SUB:GEN:SIN:NEU]", aToken.toSortedString()); + assertEquals("Haus", aToken.getReadings().get(0).getLemma()); + + aToken = tagger.lookup("hauses"); + assertNull(aToken); + + aToken = tagger.lookup("Groß"); + assertNull(aToken); + + aToken = tagger.lookup("großer"); + assertEquals("großer[ADJ:DAT:SIN:FEM:GRU:SOL, ADJ:GEN:PLU:FEM:GRU:SOL, " + + "ADJ:GEN:PLU:MAS:GRU:SOL, ADJ:GEN:PLU:NEU:GRU:SOL, " + + "ADJ:GEN:SIN:FEM:GRU:SOL, ADJ:NOM:SIN:MAS:GRU:IND, ADJ:NOM:SIN:MAS:GRU:SOL]", aToken.toSortedString()); + assertEquals("groß", aToken.getReadings().get(0).getLemma()); + + // from both german.dict and added.txt: + aToken = tagger.lookup("Interessen"); + assertEquals("Interessen[SUB:AKK:PLU:NEU, SUB:DAT:PLU:NEU, SUB:GEN:PLU:NEU, SUB:NOM:PLU:NEU]", + aToken.toSortedString()); + assertEquals("Interesse", aToken.getReadings().get(0).getLemma()); + assertEquals("Interesse", aToken.getReadings().get(1).getLemma()); + assertEquals("Interesse", aToken.getReadings().get(2).getLemma()); + assertEquals("Interesse", aToken.getReadings().get(3).getLemma()); + + // words that are not in the dictionary but that are recognized thanks to noun splitting: + aToken = tagger.lookup("Donaudampfschiff"); + assertEquals("Donaudampfschiff[SUB:AKK:SIN:NEU, SUB:DAT:SIN:NEU, SUB:NOM:SIN:NEU]", + aToken.toSortedString()); + assertEquals("Donaudampfschiff", aToken.getReadings().get(0).getLemma()); + assertEquals("Donaudampfschiff", aToken.getReadings().get(1).getLemma()); + + aToken = tagger.lookup("Häuserkämpfe"); + assertEquals("Häuserkämpfe[SUB:AKK:PLU:MAS, SUB:GEN:PLU:MAS, SUB:NOM:PLU:MAS]", + aToken.toSortedString()); + assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma()); + assertEquals("Häuserkampf", aToken.getReadings().get(1).getLemma()); + assertEquals("Häuserkampf", aToken.getReadings().get(2).getLemma()); + + aToken = tagger.lookup("Häuserkampfes"); + assertEquals("Häuserkampfes[SUB:GEN:SIN:MAS]", aToken.toSortedString()); + assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma()); + + aToken = tagger.lookup("Häuserkampfs"); + assertEquals("Häuserkampfs[SUB:GEN:SIN:MAS]", aToken.toSortedString()); + assertEquals("Häuserkampf", aToken.getReadings().get(0).getLemma()); + + aToken = tagger.lookup("Lieblingsfarben"); + assertEquals("Lieblingsfarben[SUB:AKK:PLU:FEM, SUB:DAT:PLU:FEM, SUB:GEN:PLU:FEM, " + + "SUB:NOM:PLU:FEM]", aToken.toSortedString()); + assertEquals("Lieblingsfarbe", aToken.getReadings().get(0).getLemma()); + + aToken = tagger.lookup("Autolieblingsfarben"); + assertEquals("Autolieblingsfarben[SUB:AKK:PLU:FEM, SUB:DAT:PLU:FEM, SUB:GEN:PLU:FEM, " + + "SUB:NOM:PLU:FEM]", aToken.toSortedString()); + assertEquals("Autolieblingsfarbe", aToken.getReadings().get(0).getLemma()); + + aToken = tagger.lookup("übrigbleibst"); + assertEquals("übrigbleibst[VER:2:SIN:PRÄ:NON:NEB]", aToken.toSortedString()); + assertEquals("übrigbleiben", aToken.getReadings().get(0).getLemma()); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/de/german.dict")); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() + + " lacks a POS tag in the dictionary."); + } + } + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java new file mode 100644 index 0000000..24f83f8 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunkerTest.java @@ -0,0 +1,62 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tagging.disambiguation.pl; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tagging.pl.PolishTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class PolishChunkerTest extends TestCase { + + private PolishTagger tagger; + private WordTokenizer tokenizer; + private SentenceTokenizer sentenceTokenizer; + private PolishChunker disambiguator; + + public void setUp() { + tagger = new PolishTagger(); + tokenizer = new WordTokenizer(); + sentenceTokenizer = new SRXSentenceTokenizer("pl"); + disambiguator = new PolishChunker(); + } + + public void testChunker() throws IOException { + //TestTools.myAssert("To jest duży dom.", "/[null]SENT_START To/[to]conj|To/[ten]adj:sg:nom.acc.voc:n1.n2 /[null]null jest/[być]verb:fin:sg:ter:imperf /[null]null duży/[duży]adj:sg:nom:m:pneg /[null]null dom/[dom]subst:sg:nom.acc:m3 ./[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator); + //TestTools.myAssert("Krowa pasie się na pastwisku.", "/[null]SENT_START Krowa/[krowa]subst:sg:nom:f /[null]null pasie/[pas]subst:sg:loc.voc:m3|pasie/[paść]verb:irreg /[null]null się/[siebie]qub /[null]null na/[na]prep:acc.loc /[null]null pastwisku/[pastwisko]subst:sg:dat:n+subst:sg:loc:n ./[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator); + //TestTools.myAssert("blablabla","/[null]SENT_START blablabla/[null]SENT_END", tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("To test... dezambiguacji", + "/[null]SENT_START To/[ten]adj:sg:acc.nom.voc:n:pos|To/[to]conj /[null]null test/[test]subst:sg:acc.nom:m3|test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS> /[null]null dezambiguacji/[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("On, to znaczy premier, jest niezbyt mądry", + "/[null]SENT_START On/[on]ppron3:sg:nom:m:ter ,/[null]null /[null]null to/[ten]adj:sg:acc.nom.voc:n:pos|to/[to znaczy]<TO_ZNACZY>|to/[to]conj /[null]null znaczy/[to znaczy]</TO_ZNACZY>|znaczy/[znaczyć]verb:fin:sg:ter:imperf /[null]null premier/[premier]subst:sg:nom:m1|premier/[premiera]subst:pl:gen:f ,/[null]null /[null]null jest/[być]verb:fin:sg:ter:imperf /[null]null niezbyt/[zbyt]adv:neg /[null]null mądry/[mądry]adj:sg:acc:m3:pos|mądry/[mądry]adj:sg:acc:m3:pos:aff|mądry/[mądry]adj:sg:nom:m:pos|mądry/[mądry]adj:sg:nom:m:pos:aff|mądry/[mądry]adj:sg:voc:m1.m2:pos|mądry/[mądry]adj:sg:voc:m1.m2:pos:aff", tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Lubię go z uwagi na krótkie włosy.", + "/[null]SENT_START Lubię/[lubić]verb:fin:sg:pri:imperf /[null]null go/[on]ppron3:sg:acc:m:ter:nakc:npraep|go/[on]ppron3:sg:gen:m.n.n1.n2:ter:nakc:npraep /[null]null z/[z uwagi na]<PREP:ACC>|z/[z]prep:gen.inst /[null]null uwagi/[uwaga]subst:pl:acc.gen.nom.voc:f|uwagi/[uwaga]subst:sg:dat.gen.loc:f /[null]null na/[na]prep:acc.loc|na/[z uwagi na]</PREP:ACC> /[null]null krótkie/[krótki]adj:pl:acc.nom.voc:f.m2.m3.n:pos:aff|krótkie/[krótki]adj:sg:acc.nom.voc:n:pos:aff /[null]null włosy/[włos]subst:pl:acc.nom.voc:m3|włosy/[włosy]subst:pltant:acc.nom.voc:n ./[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Test...", + "/[null]SENT_START Test/[test]subst:sg:acc.nom:m3|Test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS>", tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Test... ", + "/[null]SENT_START Test/[test]subst:sg:acc.nom:m3|Test/[testo]subst:pl:gen:n ./[...]<ELLIPSIS> ./[null]null ./[...]</ELLIPSIS> /[null]null", tokenizer, sentenceTokenizer, tagger, disambiguator); + } + + } + diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java new file mode 100644 index 0000000..90af21d --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleTest.java @@ -0,0 +1,236 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tagging.disambiguation.rules; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +import javax.xml.parsers.ParserConfigurationException; + +import junit.framework.TestCase; + +import org.xml.sax.SAXException; + +import de.danielnaber.languagetool.AnalyzedSentence; +import de.danielnaber.languagetool.AnalyzedTokenReadings; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.xx.TrimDisambiguator; + +public class DisambiguationRuleTest extends TestCase { + + private static JLanguageTool langTool; + + @Override + public void setUp() throws IOException { + if (langTool == null) { + langTool = new JLanguageTool(Language.ENGLISH); + } + } + + public void testDisambiguationRulesFromXML() throws IOException, + ParserConfigurationException, SAXException { + testDisambiguationRulesFromXML(null, false); + } + + private void testDisambiguationRulesFromXML( + final Set<Language> ignoredLanguages, final boolean verbose) + throws IOException, ParserConfigurationException, SAXException { + for (final Language lang : Language.LANGUAGES) { + if (ignoredLanguages != null && ignoredLanguages.contains(lang)) { + if (verbose) { + System.out.println("Ignoring tests for " + lang.getName()); + } + continue; + } + if (verbose) { + System.out.println("Running tests for " + lang.getName() + "..."); + } + final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader(); + final JLanguageTool languageTool = new JLanguageTool(lang); + if (!(languageTool.getLanguage().getDisambiguator() instanceof DemoDisambiguator) + && !(languageTool.getLanguage().getDisambiguator() instanceof TrimDisambiguator)) { + final String name = JLanguageTool.getDataBroker().getResourceDir() + "/" + lang.getShortName() + + "/disambiguation.xml"; + final List<DisambiguationPatternRule> rules = ruleLoader + .getRules(ruleLoader.getClass().getResourceAsStream(name)); + testDisambiguationRulesFromXML(rules, languageTool, lang); + } + } + } + + static String combine(String[] s, String glue) { + int k=s.length; + if (k==0) + return null; + StringBuilder out=new StringBuilder(); + out.append(s[0]); + for (int x=1;x<k;++x) + out.append(glue).append(s[x]); + return out.toString(); + } + + + static String sortForms(final String wordForms) { + if (",[,]".equals(wordForms)) { + return wordForms; + } + String word = wordForms.substring(0, wordForms.indexOf('[') + 1); + String forms = wordForms.substring(wordForms.indexOf('[') + + 1, wordForms.length() -1); + String[] formToSort = forms.split(","); + Arrays.sort(formToSort); + return word + + combine(formToSort, ",") + + "]"; + } + + private void testDisambiguationRulesFromXML( + final List<DisambiguationPatternRule> rules, + final JLanguageTool languageTool, final Language lang) throws IOException { + for (final DisambiguationPatternRule rule : rules) { + final String id = rule.getId(); + if (rule.getUntouchedExamples() != null) { + final List<String> goodSentences = rule.getUntouchedExamples(); + for (String goodSentence : goodSentences) { + // enable indentation use + goodSentence = goodSentence.replaceAll("[\\n\\t]+", ""); + goodSentence = cleanXML(goodSentence); + + assertTrue(goodSentence.trim().length() > 0); + final AnalyzedSentence sent = disambiguateUntil(rules, id, + languageTool.getRawAnalyzedSentence(goodSentence)); + assertTrue("The untouched example for rule " + id + "was touched!", + sent.equals(rule.replace(sent))); + } + } + final List<DisambiguatedExample> examples = rule.getExamples(); + if (examples != null) { + for (final DisambiguatedExample example : examples) { + + final String outputForms = example.getDisambiguated(); + assertTrue("No input form found for: " + id, outputForms != null); + assertTrue(outputForms.trim().length() > 0); + final int expectedMatchStart = example.getExample().indexOf( + "<marker>"); + final int expectedMatchEnd = example.getExample() + .indexOf("</marker>") + - "<marker>".length(); + if (expectedMatchStart == -1 || expectedMatchEnd == -1) { + fail(lang + + ": No position markup ('<marker>...</marker>') in disambiguated example in rule " + + rule); + } + final String inputForms = example.getAmbiguous(); + assertTrue("No input form found for: " + id, inputForms != null); + assertTrue(inputForms.trim().length() > 0); + assertTrue("Input and output forms for rule " + id + "are the same!", + !outputForms.equals(inputForms)); + final AnalyzedSentence cleanInput = languageTool + .getRawAnalyzedSentence(cleanXML(example.getExample())); + final AnalyzedSentence sent = disambiguateUntil(rules, id, + languageTool + .getRawAnalyzedSentence(cleanXML(example.getExample()))); + final AnalyzedSentence disambiguatedSent = rule + .replace(disambiguateUntil(rules, id, languageTool + .getRawAnalyzedSentence(cleanXML(example.getExample())))); + assertTrue( + "Disambiguated sentence is equal to the non-disambiguated sentence for rule :" + + id, !cleanInput.equals(disambiguatedSent)); + assertTrue( + "Disambiguated sentence is equal to the input sentence for rule :" + + id, !sent.equals(disambiguatedSent)); + String reading = ""; + for (final AnalyzedTokenReadings readings : sent.getTokens()) { + if (readings.isSentStart() && inputForms.indexOf("<S>") == -1) { + continue; + } + if (readings.getStartPos() == expectedMatchStart) { + final AnalyzedTokenReadings r[] = { readings }; + reading = new AnalyzedSentence(r).toString(); + assertTrue( + "Wrong marker position in the example for the rule " + id, + readings.getStartPos() == expectedMatchStart + && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd); + break; + } + } + assertTrue("The input form for the rule " + id + " in the example: " + + example.toString() + " is different than expected (expected " + + inputForms + " but got " + sortForms(reading) + ").", sortForms(reading) + .equals(inputForms)); + for (final AnalyzedTokenReadings readings : disambiguatedSent + .getTokens()) { + if (readings.isSentStart() && outputForms.indexOf("<S>") == -1) { + continue; + } + if (readings.getStartPos() == expectedMatchStart) { + final AnalyzedTokenReadings r[] = { readings }; + reading = new AnalyzedSentence(r).toString(); + assertTrue(readings.getStartPos() == expectedMatchStart + && readings.getStartPos() + readings.getToken().length() == expectedMatchEnd); + break; + } + } + assertTrue("The output form for the rule " + id + " in the example: " + + example.toString() + " is different than expected (expected " + + outputForms + " but got " + sortForms(reading) + ").", sortForms(reading) + .equals(outputForms)); + } + } + } + } + + // useful for testing the rule cascade + private static AnalyzedSentence disambiguateUntil( + final List<DisambiguationPatternRule> rules, final String ruleID, + final AnalyzedSentence sentence) throws IOException { + AnalyzedSentence disambiguated = sentence; + for (final DisambiguationPatternRule rule : rules) { + if (ruleID.equals(rule.getId())) { + break; + } + disambiguated = rule.replace(disambiguated); + } + return disambiguated; + } + + private static String cleanXML(final String str) { + return str.replaceAll("<.*?>", ""); + } + + /** + * Test XML patterns, as a help for people developing rules that are not + * programmers. + * @throws SAXException + * @throws ParserConfigurationException + */ + public static void main(final String[] args) throws IOException, ParserConfigurationException, SAXException { + final DisambiguationRuleTest prt = new DisambiguationRuleTest(); + System.out.println("Running disambiguator rule tests..."); + prt.setUp(); + prt.testDisambiguationRulesFromXML(); + System.out.println("Tests successful."); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java new file mode 100644 index 0000000..81439bc --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguatorTest.java @@ -0,0 +1,70 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tagging.disambiguation.rules.en; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator; +import de.danielnaber.languagetool.tagging.en.EnglishTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class EnglishRuleDisambiguatorTest extends TestCase { + private EnglishTagger tagger; + private WordTokenizer tokenizer; + private SentenceTokenizer sentenceTokenizer; + private EnglishRuleDisambiguator disambiguator; + private DemoDisambiguator disamb2; + + public void setUp() { + tagger = new EnglishTagger(); + tokenizer = new WordTokenizer(); + sentenceTokenizer = new SentenceTokenizer(); + disambiguator = new EnglishRuleDisambiguator(); + disamb2 = new DemoDisambiguator(); + } + + public void testChunker() throws IOException { + TestTools.myAssert("I cannot have it.", + "/[null]SENT_START I/[I]PRP /[null]null cannot/[can]MD /[null]null have/[have]VB /[null]null it/[it]PRP ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("I cannot have it.", + "/[null]SENT_START I/[I]PRP /[null]null cannot/[can]MD /[null]null have/[have]NN|have/[have]VB|have/[have]VBP /[null]null it/[it]PRP ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("He is to blame.", + "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null to/[to]IN|to/[to]TO /[null]null blame/[blame]VB ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("He is to blame.", + "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null to/[to]IN|to/[to]TO /[null]null blame/[blame]JJ|blame/[blame]NN:UN|blame/[blame]VB|blame/[blame]VBP ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("He is well known.", + "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null well/[well]RB /[null]null known/[known]JJ ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("He is well known.", + "/[null]SENT_START He/[he]PRP /[null]null is/[be]VBZ /[null]null well/[well]NN|well/[well]RB|well/[well]UH|well/[well]VB|well/[well]VBP /[null]null known/[know]VBN|known/[known]NN ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + + } + +} + + diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java new file mode 100644 index 0000000..e64ff60 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguatorTest.java @@ -0,0 +1,81 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tagging.disambiguation.rules.fr; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator; +import de.danielnaber.languagetool.tagging.fr.FrenchTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class FrenchRuleDisambiguatorTest extends TestCase { + private FrenchTagger tagger; + private WordTokenizer tokenizer; + private SentenceTokenizer sentenceTokenizer; + private FrenchRuleDisambiguator disambiguator; + private DemoDisambiguator disamb2; + + public void setUp() { + tagger = new FrenchTagger(); + tokenizer = new WordTokenizer(); + sentenceTokenizer = new SentenceTokenizer(); + disambiguator = new FrenchRuleDisambiguator(); + disamb2 = new DemoDisambiguator(); + } + + public void testChunker() throws IOException { + TestTools.myAssert("Je ne suis pas la seule.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null ne/[null]A /[null]null suis/[être]V etre ind pres 1 s /[null]null pas/[pas]A /[null]null la/[le]D f s /[null]null seule/[seul]J f s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Je ne suis pas la seule.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null ne/[null]null /[null]null suis/[suivre]V imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2 s|suis/[être]V etre ind pres 1 s /[null]null pas/[pas]N f sp|pas/[pas]N m sp /[null]null la/[la]N m sp|la/[la]R pers obj 3 f s|la/[le]D f s /[null]null seule/[seul]D f s|seule/[seul]J f s|seule/[seul]N f s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("Il a enfin publié son livre.", + "/[null]SENT_START Il/[il]R pers suj 3 m s /[null]null a/[avoir]V avoir ind pres 3 s /[null]null enfin/[enfin]A /[null]null publié/[publier]V ppa m s /[null]null son/[son]D e s /[null]null livre/[livre]N e s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Il a enfin publié son livre.", + "/[null]SENT_START Il/[il]R pers suj 3 m s /[null]null a/[a]N m sp|a/[avoir]V avoir ind pres 3 s /[null]null enfin/[enfin]A /[null]null publié/[publier]V ppa m s|publié/[publié]J m s /[null]null son/[son]D m s|son/[son]N m s /[null]null livre/[livre]N e s|livre/[livrer]V imp pres 2 s|livre/[livrer]V ind pres 1 s|livre/[livrer]V ind pres 3 s|livre/[livrer]V sub pres 1 s|livre/[livrer]V sub pres 3 s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("Je danse toutes les semaines au club.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null danse/[danser]V ind pres 1 s /[null]null toutes/[tous]R f p|toutes/[tout]D f p|toutes/[touter]V ind pres 2 s|toutes/[touter]V sub pres 2 s /[null]null les/[le]D e p /[null]null semaines/[semaine]N f p /[null]null au/[au]D m s /[null]null club/[club]N m s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Je danse toutes les semaines au club.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null danse/[danse]N f s|danse/[danser]V imp pres 2 s|danse/[danser]V ind pres 1 s|danse/[danser]V ind pres 3 s|danse/[danser]V sub pres 1 s|danse/[danser]V sub pres 3 s /[null]null toutes/[tous]R f p|toutes/[tout]D f p|toutes/[touter]V ind pres 2 s|toutes/[touter]V sub pres 2 s /[null]null les/[le]D e p|les/[les]R pers obj 3 p /[null]null semaines/[semaine]N f p /[null]null au/[au]D m s /[null]null club/[club]N m s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("Quand j'étais petit, je jouais au football.", + "/[null]SENT_START Quand/[quand]C sub /[null]null j/[je]R pers suj 1 s '/[null]null étais/[être]V etre ind impa 1 s /[null]null petit/[petit]J m s ,/[null]null /[null]null je/[je]R pers suj 1 s /[null]null jouais/[jouer]V ind impa 1 s /[null]null au/[au]D m s /[null]null football/[football]N m s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Quand j'étais petit, je jouais au football.", + "/[null]SENT_START Quand/[quand]C sub /[null]null j/[j]N m sp|j/[je]R pers suj 1 s '/[null]null étais/[étai]N m p|étais/[être]V etre ind impa 1 s|étais/[être]V etre ind impa 2 s /[null]null petit/[petit]J m s|petit/[petit]N m s ,/[null]null /[null]null je/[je]R pers suj 1 s /[null]null jouais/[jouer]V ind impa 1 s|jouais/[jouer]V ind impa 2 s /[null]null au/[au]D m s /[null]null football/[football]N m s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools.myAssert("Je suis petite.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null suis/[être]V etre ind pres 1 s /[null]null petite/[petit]J f s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools.myAssert("Je suis petite.", + "/[null]SENT_START Je/[je]R pers suj 1 s /[null]null suis/[suivre]V imp pres 2 s|suis/[suivre]V ind pres 1 s|suis/[suivre]V ind pres 2 s|suis/[être]V etre ind pres 1 s /[null]null petite/[petit]J f s|petite/[petit]N f s ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + } + +} + + diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java new file mode 100644 index 0000000..9e28e54 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguatorTest.java @@ -0,0 +1,89 @@ +package de.danielnaber.languagetool.tagging.disambiguation.rules.ro; + +import java.io.IOException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator; +import de.danielnaber.languagetool.tagging.ro.RomanianTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.ro.RomanianWordTokenizer; + +public class RomanianRuleDisambiguatorTest extends TestCase { + + private RomanianTagger tagger; + private RomanianWordTokenizer tokenizer; + private SentenceTokenizer sentenceTokenizer; + private RomanianRuleDisambiguator disambiguator; + private DemoDisambiguator disamb2; + + public void setUp() { + tagger = new RomanianTagger(); + tokenizer = new RomanianWordTokenizer(); + sentenceTokenizer = new SentenceTokenizer(); + disambiguator = new RomanianRuleDisambiguator(); + disamb2 = new DemoDisambiguator(); + } + + public void testCare1() throws IOException { + TestTools + .myAssert( + "Persoana care face treabă.", + "/[null]SENT_START Persoana/[persoană]Sfs3aac000 /[null]null care/[car]Snp3anc000|care/[care]0000000000|care/[care]N000a0l000|care/[căra]V0p3000cz0|care/[căra]V0s3000cz0 /[null]null face/[face]V000000f00|face/[face]V0s3000iz0 /[null]null treabă/[treabă]Sfs3anc000 ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools + .myAssert( + "Persoana care face treabă.", + "/[null]SENT_START Persoana/[persoană]Sfs3aac000 /[null]null care/[care]N000a0l000 /[null]null face/[face]V000000f00|face/[face]V0s3000iz0 /[null]null treabă/[treabă]Sfs3anc000 ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + + } + + public void testEsteO() throws IOException { + TestTools + .myAssert( + "este o masă.", + "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masa]V0s3000is0|masă/[masă]Sfs3anc000 ./[null]null", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools + .myAssert( + "este o masă.", + "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masă]Sfs3anc000 ./[null]null", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools + .myAssert( + "este o masă", + "/[null]SENT_START este/[fi]V0s3000izb /[null]null o/[o]Dfs3a0t000|o/[o]I00000o000|o/[o]Nfs3a0p00c|o/[o]Sms3anc000|o/[vrea]V0s3000iov /[null]null masă/[masă]Sfs3anc000", + tokenizer, sentenceTokenizer, tagger, disambiguator); + + } + + public void testDezambiguizareVerb() throws IOException { + TestTools + .myAssert( + "vom participa la", + "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null participa/[participa]V000000f00|participa/[participa]V0s3000ii0 /[null]null la/[la]P000000000|la/[la]Sms3anc000", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools + .myAssert( + "vom participa la", + "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null participa/[participa]V000000f00 /[null]null la/[la]P000000000|la/[la]Sms3anc000", + tokenizer, sentenceTokenizer, tagger, disambiguator); + + TestTools + .myAssert( + "vom culege", + "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null culege/[culege]V000000f00|culege/[culege]V0s2000m00|culege/[culege]V0s3000iz0", + tokenizer, sentenceTokenizer, tagger, disamb2); + TestTools + .myAssert( + "vom culege", + "/[null]SENT_START vom/[vrea]V0p1000ivv /[null]null culege/[culege]V000000f00", + tokenizer, sentenceTokenizer, tagger, disambiguator); + TestTools + .myAssert( + "veți culege", + "/[null]SENT_START veți/[vrea]V0p2000ivv /[null]null culege/[culege]V000000f00", + tokenizer, sentenceTokenizer, tagger, disambiguator); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java new file mode 100644 index 0000000..9828c1d --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/en/EnglishTaggerTest.java @@ -0,0 +1,90 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.en; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import junit.framework.TestCase; +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; +import de.danielnaber.languagetool.AnalyzedTokenReadings; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +/** + * @author Daniel Naber + */ +public class EnglishTaggerTest extends TestCase { + + private EnglishTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new EnglishTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + assertFalse(wd.getTag() == null); + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("This is a big house.", + "This/[this]DT|This/[this]PDT -- is/[be]VBZ -- a/[a]DT -- big/[big]JJ|big/[big]RB -- house/[house]NN|house/[house]VB|house/[house]VBP", tokenizer, tagger); + TestTools.myAssert("Marketing do a lot of trouble.", + "Marketing/[market]VBG|Marketing/[marketing]NN:U -- do/[do]VB|do/[do]VBP -- a/[a]DT -- lot/[lot]NN -- of/[of]IN -- trouble/[trouble]NN:UN|trouble/[trouble]VB|trouble/[trouble]VBP", tokenizer, tagger); + TestTools.myAssert("Manager use his laptop every day.", + "Manager/[manager]NN -- use/[use]NN:UN|use/[use]VB|use/[use]VBP -- his/[hi]NNS|his/[his]PRP$ -- laptop/[laptop]NN -- every/[every]DT -- day/[day]NN:UN", tokenizer, tagger); + TestTools.myAssert("This is a bigger house.", + "This/[this]DT|This/[this]PDT -- is/[be]VBZ -- a/[a]DT -- bigger/[big]JJR -- house/[house]NN|house/[house]VB|house/[house]VBP", tokenizer, tagger); + TestTools.myAssert("He doesn't believe me.", + "He/[he]PRP -- doesn/[do]VBZ -- t/[null]null -- believe/[believe]VB|believe/[believe]VBP -- me/[I]PRP", tokenizer, tagger); + TestTools.myAssert("It has become difficult.", + "It/[it]PRP -- has/[have]VBZ -- become/[become]VB|become/[become]VBN|become/[become]VBP -- difficult/[difficult]JJ", tokenizer, tagger); + } + + public void testLemma() throws IOException { + EnglishTagger tagger = new EnglishTagger(); + List<String> words = new ArrayList<String>(); + words.add("Oliver"); + words.add("works"); + List<AnalyzedTokenReadings> aToken = tagger.tag(words); + + assertEquals(2, aToken.size()); + assertEquals(3, aToken.get(0).getReadings().size()); + assertEquals(2, aToken.get(1).getReadings().size()); + + assertEquals("Oliver", aToken.get(0).getReadings().get(0).getLemma()); + // TODO: are the following two correct? + assertEquals("oliver", aToken.get(0).getReadings().get(1).getLemma()); + assertEquals("olive", aToken.get(0).getReadings().get(2).getLemma()); + + assertEquals("work", aToken.get(1).getReadings().get(0).getLemma()); + assertEquals("work", aToken.get(1).getReadings().get(1).getLemma()); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java new file mode 100644 index 0000000..b52ecc9 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/eo/EsperantoTaggerTest.java @@ -0,0 +1,45 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.eo; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class EsperantoTaggerTest extends TestCase { + + private EsperantoTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new EsperantoTagger(); + tokenizer = new WordTokenizer(); + } + + public void testTagger() throws IOException { + TestTools.myAssert("Tio estas simpla testo", + "Tio/[null]T nak np t o -- estas/[esti]V nt as -- simpla/[simpla]A nak np -- testo/[testo]O nak np", tokenizer, tagger); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java new file mode 100644 index 0000000..fd373a7 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/es/SpanishTaggerTest.java @@ -0,0 +1,59 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.es; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class SpanishTaggerTest extends TestCase { + + private SpanishTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new SpanishTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Soy un hombre muy honrado.", + "Soy/[ser]VSIP1S0 -- un/[uno]DI0MS0 -- hombre/[hombre]I|hombre/[hombre]NCMS000 -- muy/[muy]RG -- honrado/[honrar]VMP00SM", tokenizer, tagger); + TestTools.myAssert("Tengo que ir a mi casa.", + "Tengo/[tener]VMIP1S0 -- que/[que]CS|que/[que]PR0CN000 -- ir/[ir]VMN0000 -- a/[a]NCFS000|a/[a]SPS00 -- mi/[mi]DP1CSS|mi/[mi]NCMS000 -- casa/[casa]NCFS000|casa/[casar]VMIP3S0|casa/[casar]VMM02S0", tokenizer, tagger); + TestTools.myAssert("blablabla","blablabla/[null]null", tokenizer, tagger); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java new file mode 100644 index 0000000..f453891 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/fr/FrenchTaggerTest.java @@ -0,0 +1,62 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.fr; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class FrenchTaggerTest extends TestCase { + + private FrenchTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new FrenchTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("C'est la vie.", + "C/[C]N m sp|C/[c]N m sp|C/[c]R dem e s -- est/[est]N m s|est/[être]V etre ind pres 3 s -- la/[la]N m sp|la/[la]R pers obj 3 f s|la/[le]D f s -- vie/[vie]N f s", tokenizer, tagger); + TestTools.myAssert("Je ne parle pas français.", + "Je/[je]R pers suj 1 s -- ne/[null]null -- parle/[parler]V imp pres 2 s|parle/[parler]V ind pres 1 s|parle/[parler]V ind pres 3 s|parle/[parler]V sub pres 1 s|parle/[parler]V sub pres 3 s -- pas/[pas]N f sp|pas/[pas]N m sp -- français/[français]J m sp|français/[français]N m sp", tokenizer, tagger); + TestTools.myAssert("blablabla","blablabla/[blablabla]N m s", tokenizer, tagger); + TestTools.myAssert("passagère","passagère/[passager]J f s|passagère/[passager]N f s", tokenizer, tagger); + TestTools.myAssert("non_existing_word","non_existing_word/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java new file mode 100644 index 0000000..563d7ce --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/gl/GalicianTaggerTest.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.gl; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +/** + * @author Susana Sotelo Docio + * based on English test + */ +public class GalicianTaggerTest extends TestCase { + + private GalicianTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new GalicianTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Todo vai mudar", + "Todo/[todo]DI0MS0|Todo/[todo]PI0MS000 -- vai/[ir]VMIP3S0|vai/[ir]VMM02S0 -- mudar/[mudar]VMN0000|mudar/[mudar]VMN01S0|mudar/[mudar]VMN03S0|mudar/[mudar]VMSF1S0|mudar/[mudar]VMSF3S0", tokenizer, tagger); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java new file mode 100644 index 0000000..eeb6ffd --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/it/ItalianTaggerTest.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.it; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class ItalianTaggerTest extends TestCase { + + private ItalianTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new ItalianTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Non c'è linguaggio senza inganno.", + "Non/[non]ADV -- c/[C]NPR -- è/[essere]AUX:ind+pres+3+s|è/[essere]VER:ind+pres+3+s -- linguaggio/[linguaggio]NOUN-M:s -- senza/[senza]CON|senza/[senza]PRE -- inganno/[ingannare]VER:ind+pres+1+s|inganno/[inganno]NOUN-M:s", tokenizer, tagger); + TestTools.myAssert("Amo quelli che desiderano l'impossibile.", + "Amo/[amare]VER:ind+pres+1+s -- quelli/[quelli]PRO-DEMO-M-P|quelli/[quello]DET-DEMO:m+p -- che/[che]CON|che/[che]DET-WH:f+p|che/[che]DET-WH:f+s|che/[che]DET-WH:m+p|che/[che]DET-WH:m+s|che/[che]WH-CHE -- desiderano/[desiderare]VER:ind+pres+3+p -- l/[null]null -- impossibile/[impossibile]ADJ:pos+f+s|impossibile/[impossibile]ADJ:pos+m+s", tokenizer, tagger); + TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java new file mode 100644 index 0000000..c6c4322 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/nl/DutchTaggerTest.java @@ -0,0 +1,58 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.nl; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class DutchTaggerTest extends TestCase { + + private DutchTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new DutchTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Dit is een Nederlandse zin om het programma'tje te testen.", + "Dit/[dit]DTh -- is/[zijn]VB3 -- een/[een]DTe|een/[een]NM|een/[een]NM1|een/[een]NN1d -- Nederlandse/[Nederlandse]NN1 -- zin/[zin]NN1d|zin/[zinnen]VB1 -- om/[om]PRom -- het/[het]DTh -- programma/[programma]NN1d|programma/[programma]NN1h -- tje/[null]null -- te/[te]PRte -- testen/[test]NN2|testen/[testen]VBi", tokenizer, tagger); + TestTools.myAssert("zwijnden","zwijnden/[zwijnen]VBh", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java new file mode 100644 index 0000000..d9ced96 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/pl/PolishTaggerTest.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.pl; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class PolishTaggerTest extends TestCase { + + private PolishTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new PolishTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("To jest duży dom.", + "To/[ten]adj:sg:acc.nom.voc:n:pos|To/[to]conj -- jest/[być]verb:fin:sg:ter:imperf -- duży/[duży]adj:sg:acc:m3:pos:aff|duży/[duży]adj:sg:nom:m:pos:aff|duży/[duży]adj:sg:voc:m1.m2:pos:aff -- dom/[dom]subst:sg:acc.nom:m3", tokenizer, tagger); + TestTools.myAssert("Krowa pasie się na pastwisku.", + "Krowa/[krowa]subst:sg:nom:f -- pasie/[pas]subst:sg:loc.voc:m3 -- się/[siebie]qub -- na/[na]prep:acc.loc -- pastwisku/[pastwisko]subst:sg:dat.loc:n", tokenizer, tagger); + TestTools.myAssert("blablabla", "blablabla/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java new file mode 100644 index 0000000..5d968af --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerDiacriticsTest.java @@ -0,0 +1,97 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.ro; + +/** + * + * These tests are kept to make sure UTF-8 dictionaries are correctly read.<br/> + * Prior to morfologik 1.1.4 some words containing diacritics were not correctly + * returned. + * + * @author Ionuț Păduraru + * @since 08.03.2009 19:25:50 + */ +public class RomanianTaggerDiacriticsTest extends RomanianTaggerTestAbs { + + /** + * "test_diacritics.dict" was built from a simple input file : + * <p> + * cușcă cușcă 001 + * </p> + * <p> + * cartea carte 000 + * </p> + * <p> + * mergeam merge 001 + * </p> + * <p> + * merseserăm merge 002 + * </p> + * <p> + * cuțit cuțit 001 + * </p> + * <p> + * cuțitul cuțit 002 + * </p> + * + * @author Ionuț Păduraru + * @since 08.03.2009 19:15:59 + * @throws Exception + */ + @Override + protected RomanianTagger createTagger() { + RomanianTagger res = new RomanianTagger( + "/ro/test_diacritics.dict"); + return res; + } + + /** + * Prior to morfologik 1.1.4: For "merseserăm" the lemma is incorect: "mege" + * instead of "merge". If the dictionary is used from + * command-line(/fsa_morph -d ...), the correct lemma is returned. + * + * @author Ionuț Păduraru + * @since 08.03.2009 19:25:59 + * @throws Exception + */ + public void testTaggerMerseseram() throws Exception { + // these tests are using "test_diacritics.dict" + assertHasLemmaAndPos("făcusem", "face", "004"); + assertHasLemmaAndPos("cuțitul", "cuțit", "002"); + // make sure lemma is correct (POS is hard-coded, not important) + assertHasLemmaAndPos("merseserăm", "merge", "002"); + } + + /** + * + * @author Ionuț Păduraru + * @since 24.03.2009 21:39:25 + * @throws Exception + */ + public void testTaggerCuscaCutit() throws Exception { + // these tests are using "test_diacritics.dict" + // all these are correct, they are here just to prove that "some" words + // are corectly returned + + assertHasLemmaAndPos("cușcă", "cușcă", "001"); + assertHasLemmaAndPos("cuțit", "cuțit", "001"); + assertHasLemmaAndPos("cuțitul", "cuțit", "002"); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java new file mode 100644 index 0000000..05f37cc --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTest.java @@ -0,0 +1,105 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.ro; + +import java.io.IOException; + +import de.danielnaber.languagetool.TestTools; + +/** + * + * @author Ionuț Păduraru + * @since 20.02.2009 19:36:32 + */ +public class RomanianTaggerTest extends RomanianTaggerTestAbs { + + /** + * First we test if the tagger works fine with single words + * + * @author Ionuț Păduraru + * @since 20.02.2009 19:50:10 + * @throws Exception + */ + public void testTagger_Merge() throws Exception { + // merge - verb indicativ imperfect, persoana întâi, singular + assertHasLemmaAndPos("mergeam", "merge", "V0s1000ii0"); + // merge - verb indicativ imperfect, persoana întâi, plural + assertHasLemmaAndPos("mergeam", "merge", "V0p1000ii0"); + // merge - verb indicativ imperfect, persoana întâi, plural + } + + /** + * <code>merseserăm</code> had some problems (incorect lemma - mege - + * missing "r") + * + * @author Ionuț Păduraru + * @since 20.02.2009 20:24:55 + * @throws Exception + */ + public void testTagger_Merseseram() throws Exception { + // first make sure lemma is correct (ignore POS) + assertHasLemmaAndPos("merseserăm", "merge", null); + // now that lemma is correct, also check POS + assertHasLemmaAndPos("merseserăm", "merge", "V0p1000im0"); + } + + /** + * A special word: a fi (to be) - eu sunt (i am) + ei sunt (they are) + * + * @author Ionuț Păduraru + * @since 20.02.2009 20:21:10 + * @throws Exception + */ + public void testTagger_Fi() throws Exception { + // fi - verb indicativ prezent, persoana întâi, singular + assertHasLemmaAndPos("sunt", "fi", "V0s1000izf"); + // fi verb indicativ prezent, persoana a treia, plural + assertHasLemmaAndPos("sunt", "fi", "V0p3000izf"); + } + + /** + * the big picture: test is tagger performs well with a sentence + * + * @author ionuț păduraru + * @since 20.02.2009 01:12:33 + * @throws IOException + */ + public void testTagger() throws IOException { + TestTools + .myAssert( + "Cartea este frumoasă.", + "Cartea/[carte]Sfs3aac000 -- este/[fi]V0s3000izb -- frumoasă/[frumos]Afs3an0000", + getTokenizer(), getTagger()); + } + + /** + * + * @author ionuț păduraru + * @since 20.02.2009 01:44:50 + * @throws IOException + */ + public void testTaggerMerseseram() throws IOException { + TestTools.myAssert("merseserăm", "merseserăm/[merge]V0p1000im0", + getTokenizer(), getTagger()); + } + + public static void main(String[] args) { + junit.textui.TestRunner.run(RomanianTaggerTest.class); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java new file mode 100644 index 0000000..ca5bdf9 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ro/RomanianTaggerTestAbs.java @@ -0,0 +1,147 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.ro; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.AnalyzedToken; +import de.danielnaber.languagetool.AnalyzedTokenReadings; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +/** + * <p> + * Root class for RomanianTagger tests + * </p> + * <p> + * Provides convenient methods to find specific lemma/pos + * </p> + * + * + * @author Ionuț Păduraru + * @since 20.02.2009 19:36:32 + * + */ +public abstract class RomanianTaggerTestAbs extends TestCase { + + private RomanianTagger tagger; + private WordTokenizer tokenizer; + + /* + * (non-Javadoc) + * + * @see junit.framework.TestCase#setUp() + */ + public void setUp() { + tagger = createTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + /** + * + * @author Ionuț Păduraru + * @since 08.03.2009 22:09:01 + * @return + */ + protected RomanianTagger createTagger() { + // override this if you need need another dictionary (a disctionary + // based on another file) + return new RomanianTagger(); + } + + /** + * Verify if <code>inflected</code> contains the specified lemma and pos + * + * @author Ionuț Păduraru + * @since 20.02.2009 19:17:54 + * @param inflected + * - input word, inflected form + * @param lemma + * expected lemma + * @param posTag + * expected tag for lemma + * @throws IOException + */ + protected void assertHasLemmaAndPos(String inflected, String lemma, + String posTag) throws IOException { + List<AnalyzedTokenReadings> tags = tagger.tag(createList(inflected)); + StringBuilder allTags = new StringBuilder(); + boolean found = false; + for (AnalyzedTokenReadings analyzedTokenReadings : tags) { + int length = analyzedTokenReadings.getReadingsLength(); + for (int i = 0; i < length; i++) { + AnalyzedToken token = analyzedTokenReadings.getAnalyzedToken(i); + String crtLemma = token.getLemma(); + String crtPOSTag = token.getPOSTag(); + allTags.append(String.format("[%s/%s]", crtLemma, crtPOSTag)); + found = ((null == lemma) || (lemma.equals(crtLemma))) + && ((null == posTag) || (posTag.equals(crtPOSTag))); + if (found) + break; + } // for i + if (found) + break; + } // foreach tag + assertTrue(String.format("Lemma and POS not found for word [%s]! " + + "Expected [%s/%s]. Actual: %s", inflected, lemma, posTag, + allTags.toString()), found); + } + + /** + * Create a List containing some words + * + * @author Ionuț Păduraru + * @since 20.02.2009 19:13:57 + * @param words + * @return + */ + private List<String> createList(String... words) { + List<String> res = new ArrayList<String>(); + for (String s : words) { + res.add(s); + } + return res; + } + + public RomanianTagger getTagger() { + return tagger; + } + + public WordTokenizer getTokenizer() { + return tokenizer; + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java new file mode 100644 index 0000000..3a555d1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/ru/RussianTaggerTest.java @@ -0,0 +1,59 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.ru; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class RussianTaggerTest extends TestCase { + + private RussianTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new RussianTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему.", + "Все/[весь]PADJ:PL:Nom|Все/[весь]PADJ:PL:V|Все/[все]ADV|Все/[все]PNN:PL:Nom|Все/[все]PNN:PL:V|Все/[все]PNN:Sin:Nom|Все/[все]PNN:Sin:V -- счастливые/[счастливый]ADJ:PL:Nom|счастливые/[счастливый]ADJ:PL:V -- семьи/[семья]NN:Fem:PL:Nom|семьи/[семья]NN:Fem:PL:V|семьи/[семья]NN:Fem:Sin:R -- похожи/[похожий]ADJ_Short:PL -- друг/[друг]NN:Masc:Sin:Nom -- на/[на]PREP -- друга/[друг]NN:Masc:Sin:R|друга/[друг]NN:Masc:Sin:V -- каждая/[каждый]PADJ:Fem:Nom -- несчастливая/[несчастливый]ADJ:Fem:Nom -- семья/[семья]NN:Fem:Sin:Nom -- несчастлива/[несчастливый]ADJ_Short:Fem -- по-своему/[по-своему]ADV", tokenizer, tagger); + TestTools.myAssert("Все смешалось в доме Облонских.", + "Все/[весь]PADJ:PL:Nom|Все/[весь]PADJ:PL:V|Все/[все]ADV|Все/[все]PNN:PL:Nom|Все/[все]PNN:PL:V|Все/[все]PNN:Sin:Nom|Все/[все]PNN:Sin:V -- смешалось/[смешаться]VB:Past:Neut -- в/[в]PREP -- доме/[дом]NN:Masc:Sin:P -- Облонских/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java new file mode 100644 index 0000000..5949afc --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sk/SlovakTaggerTest.java @@ -0,0 +1,58 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.sk; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class SlovakTaggerTest extends TestCase { + + private SlovakTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new SlovakTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() +" lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Tu nájdete vybrané čísla a obsahy časopisu Kultúra slova.", + "Tu/[tu]J|Tu/[tu]PD|Tu/[tu]T -- nájdete/[nájsť]VKdpb+ -- vybrané/[vybraný]Gtfp1x|vybrané/[vybraný]Gtfp4x|vybrané/[vybraný]Gtfp5x|vybrané/[vybraný]Gtip1x|vybrané/[vybraný]Gtip4x|vybrané/[vybraný]Gtip5x|vybrané/[vybraný]Gtnp1x|vybrané/[vybraný]Gtnp4x|vybrané/[vybraný]Gtnp5x|vybrané/[vybraný]Gtns1x|vybrané/[vybraný]Gtns4x|vybrané/[vybraný]Gtns5x -- čísla/[číslo]SSnp1|čísla/[číslo]SSnp4|čísla/[číslo]SSnp5|čísla/[číslo]SSns2 -- a/[a]J|a/[a]O|a/[a]Q|a/[a]SUnp1|a/[a]SUnp2|a/[a]SUnp3|a/[a]SUnp4|a/[a]SUnp5|a/[a]SUnp6|a/[a]SUnp7|a/[a]SUns1|a/[a]SUns2|a/[a]SUns3|a/[a]SUns4|a/[a]SUns5|a/[a]SUns6|a/[a]SUns7|a/[a]T|a/[a]W|a/[as]W -- obsahy/[obsah]SSip1|obsahy/[obsah]SSip4|obsahy/[obsah]SSip5 -- časopisu/[časopis]SSis2|časopisu/[časopis]SSis3 -- Kultúra/[kultúra]SSfs1|Kultúra/[kultúra]SSfs5 -- slova/[slovo]SSns2", tokenizer, tagger); + TestTools.myAssert("blabla","blabla/[null]null", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java new file mode 100644 index 0000000..128d46a --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tagging/sv/SwedishTaggerTest.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tagging.sv; + +import java.io.IOException; + +import morfologik.stemming.Dictionary; +import morfologik.stemming.DictionaryLookup; +import morfologik.stemming.WordData; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; + +public class SwedishTaggerTest extends TestCase { + + private SwedishTagger tagger; + private WordTokenizer tokenizer; + + public void setUp() { + tagger = new SwedishTagger(); + tokenizer = new WordTokenizer(); + } + + public void testDictionary() throws IOException { + final Dictionary dictionary = Dictionary.read( + this.getClass().getResource(tagger.getFileName())); + final DictionaryLookup dl = new DictionaryLookup(dictionary); + for (WordData wd : dl) { + if (wd.getTag() == null || wd.getTag().length() == 0) { + System.err.println("**** Warning: the word " + wd.getWord() + "/" + wd.getStem() + + " lacks a POS tag in the dictionary."); + } + } + } + + public void testTagger() throws IOException { + TestTools.myAssert("Det är nog bäst att du får en klubba till", + "Det/[det]PN -- är/[vara]VB:PRS -- nog/[nog]AB -- bäst/[bra]JJ:S|bäst/[bäst]AB|bäst/[god]JJ:S -- att/[att]KN -- du/[du]PN -- får/[få]VB:PRS|får/[får]NN:OF:PLU:NOM:NEU|får/[får]NN:OF:SIN:NOM:NEU -- en/[en]NN:OF:SIN:NOM:UTR|en/[en]PN|en/[passant]en passant NN:OF:SIN:NOM:UTR|en/[passanten]en passant NN:BF:SIN:NOM:UTR|en/[passantens]en passant NN:BF:SIN:GEN:UTR|en/[passanter]en passant NN:OF:PLU:NOM:UTR|en/[passanterna]en passant NN:BF:PLU:NOM:UTR|en/[passanternas]en passant NN:BF:PLU:GEN:UTR|en/[passanters]en passant NN:OF:PLU:GEN:UTR|en/[passants]en passant NN:OF:SIN:GEN:UTR -- klubba/[klubba]NN:OF:SIN:NOM:UTR|klubba/[klubba]VB:IMP|klubba/[klubba]VB:INF -- till/[till]AB|till/[till]PP", tokenizer, tagger); + TestTools.myAssert("Du menar sannolikt \"massera\" om du inte skriver om masarnas era förstås.", + "Du/[du]PN -- menar/[mena]VB:PRS -- sannolikt/[sannolik]JJ:PN|sannolikt/[sannolikt]AB -- massera/[massera]VB:IMP|massera/[massera]VB:INF -- om/[om]AB|om/[om]KN|om/[om]PP -- du/[du]PN -- inte/[inte]AB -- skriver/[skriva]VB:PRS -- om/[om]AB|om/[om]KN|om/[om]PP -- masarnas/[mas]NN:BF:PLU:GEN:UTR -- era/[era]NN:OF:SIN:NOM:UTR|era/[era]PN -- förstås/[förstå]VB:INF:PF|förstås/[förstå]VB:PRS:PF|förstås/[förstås]AB", tokenizer, tagger); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt b/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt new file mode 100644 index 0000000..5e7c8fb --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/test-en.txt @@ -0,0 +1 @@ +This is an test.
\ No newline at end of file diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java new file mode 100644 index 0000000..6acf29c --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/RussianSRXSentenceTokenizerTest.java @@ -0,0 +1,120 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; + +/* + * Russian SRX Sentence Tokenizer Test + * $Id$ + */ + + +public class RussianSRXSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("ru"); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("ru"); + + + public final void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public final void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." }); + + testSplit(new String[] { "Heute ist der 13.12.2004." }); + testSplit(new String[] { "Es geht am 24.09. los." }); + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" }); + + // Tests taken from LanguageTool's SentenceSplitterTest.py: + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + testSplit(new String[] { "Don't split strings like U.S.A. either." }); + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + // known to fail: + // testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "He won't say No.", "Not really." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." }); + // From the Russian abbreviation list: + testSplit(new String[] { "Отток капитала из России составил 7 млрд. долларов, сообщил министр финансов Алексей Кудрин." }); + testSplit(new String[] { "Журнал издаётся с 1967 г., пользуется большой популярностью в мире." }); + testSplit(new String[] { "С 2007 г. периодичность выхода газеты – 120 раз в год." }); + testSplit(new String[] { "Редакция журнала находится в здании по адресу: г. Москва, 110000, улица Мира, д. 1." }); + testSplit(new String[] { "Все эти вопросы заставляют нас искать ответы в нашей истории 60-80-х гг. прошлого столетия." }); + testSplit(new String[] { "Более 300 тыс. документов и справочников." }); + testSplit(new String[] { "Скидки до 50000 руб. на автомобили." }); + testSplit(new String[] { "Изготовление визиток любыми тиражами (от 20 шт. до 10 тысяч) в минимальные сроки (от 20 минут)." }); + } + + public final void testSplit(final String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java new file mode 100644 index 0000000..7f3b76c --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizerTest.java @@ -0,0 +1,108 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; + +/** + * @author Daniel Naber + */ +public class SRXSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("en"); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("en"); + + public void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + public void testTokenize() { + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" }); + + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + testSplit(new String[] { "This is e.g. Mr. Smith, who talks slowly...", + "But this is another sentence." }); + testSplit(new String[] { "Chanel no. 5 is blah." }); + testSplit(new String[] { "Mrs. Jones gave Peter $4.5, to buy Chanel No 5.", + "He never came back." }); + testSplit(new String[] { "On p. 6 there's nothing. ", "Another sentence." }); + testSplit(new String[] { "Leave me alone!, he yelled. ", "Another sentence." }); + testSplit(new String[] { "\"Leave me alone!\", he yelled." }); + testSplit(new String[] { "'Leave me alone!', he yelled. ", "Another sentence." }); + testSplit(new String[] { "'Leave me alone!,' he yelled. ", "Another sentence." }); + testSplit(new String[] { "This works on the phrase level, i.e. not on the word level." }); + testSplit(new String[] { "Let's meet at 5 p.m. in the main street." }); + testSplit(new String[] { "James comes from the U.K. where he worked as a programmer." }); + testSplit(new String[] { "Don't split strings like U.S.A. please." }); + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + testSplit(new String[] { "The sentence (...) ends here." }); + testSplit(new String[] { "The sentence [...] ends here." }); + testSplit(new String[] { "The sentence ends here (...). ", "Another sentence." }); + // previously known failed but not now :) + testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[]{"He will not. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "He won't say No.", "Not really." }); + testSplit(new String[] { "He won't say no. 5 is better. ", "Not really." }); + testSplit(new String[] { "He won't say No. 5 is better. ", "Not really." }); + testSplit(new String[] { "They met at 5 p.m. on Thursday." }); + testSplit(new String[] { "They met at 5 p.m. ", "It was Thursday." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." }); + // From the abbreviation list: + testSplit(new String[] { "Jones Bros. have built a succesful company." }); + // parentheses: + testSplit(new String[] { "It (really!) works." }); + testSplit(new String[] { "It [really!] works." }); + testSplit(new String[] { "It works (really!). ", "No doubt." }); + testSplit(new String[] { "It works [really!]. ", "No doubt." }); + testSplit(new String[] { "It really(!) works well." }); + testSplit(new String[] { "It really[!] works well." }); + } + + private void testSplit(String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java new file mode 100644 index 0000000..9d15429 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/SentenceTokenizerTest.java @@ -0,0 +1,107 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; + +/** + * @author Daniel Naber + */ +public class SentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SentenceTokenizer(); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SentenceTokenizer(); + + public void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + public void testTokenize() { + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" }); + + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + testSplit(new String[] { "This is e.g. Mr. Smith, who talks slowly...", + "But this is another sentence." }); + testSplit(new String[] { "Chanel no. 5 is blah." }); + testSplit(new String[] { "Mrs. Jones gave Peter $4.5, to buy Chanel No 5.", + "He never came back." }); + testSplit(new String[] { "On p. 6 there's nothing. ", "Another sentence." }); + testSplit(new String[] { "Leave me alone!, he yelled. ", "Another sentence." }); + testSplit(new String[] { "\"Leave me alone!\", he yelled." }); + testSplit(new String[] { "'Leave me alone!', he yelled. ", "Another sentence." }); + testSplit(new String[] { "'Leave me alone!,' he yelled. ", "Another sentence." }); + testSplit(new String[] { "This works on the phrase level, i.e. not on the word level." }); + testSplit(new String[] { "Let's meet at 5 p.m. in the main street." }); + testSplit(new String[] { "James comes from the U.K. where he worked as a programmer." }); + testSplit(new String[] { "Don't split strings like U.S.A. please." }); + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + testSplit(new String[] { "The sentence (...) ends here." }); + testSplit(new String[] { "The sentence [...] ends here." }); + testSplit(new String[] { "The sentence ends here (...). ", "Another sentence." }); + // TODO: known to fail: + // testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[]{"He will not. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "He won't say No.", "Not really." }); + testSplit(new String[] { "He won't say no. 5 is better. ", "Not really." }); + testSplit(new String[] { "He won't say No. 5 is better. ", "Not really." }); + testSplit(new String[] { "They met at 5 p.m. on Thursday." }); + testSplit(new String[] { "They met at 5 p.m. ", "It was Thursday." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." }); + // From the abbreviation list: + testSplit(new String[] { "Jones Bros. have built a succesful company." }); + // parentheses: + testSplit(new String[] { "It (really!) works." }); + testSplit(new String[] { "It [really!] works." }); + testSplit(new String[] { "It works (really!). ", "No doubt." }); + testSplit(new String[] { "It works [really!]. ", "No doubt." }); + testSplit(new String[] { "It really(!) works well." }); + testSplit(new String[] { "It really[!] works well." }); + } + + private void testSplit(String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java new file mode 100644 index 0000000..675dfb0 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/WordTokenizerTest.java @@ -0,0 +1,38 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers; + +import junit.framework.TestCase; + +import java.util.List; + +public class WordTokenizerTest extends TestCase { + + public void testTokenize() { + WordTokenizer w = new WordTokenizer(); + List <String> testList = w.tokenize("This is\u00A0a test"); + assertEquals(testList.size(), 7); + assertEquals("[This, , is, \u00A0, a, , test]", testList.toString()); + testList = w.tokenize("This\rbreaks"); + assertEquals(3, testList.size()); + assertEquals("[This, \r, breaks]", testList.toString()); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java new file mode 100644 index 0000000..eb6d17a --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizerTest.java @@ -0,0 +1,118 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers.cs; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class CzechSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new CzechSentenceTokenizer(); + + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new CzechSentenceTokenizer(); + + public final void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public final void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Tři sta třicet tři stříbrných křepelek přeletělo přes stři sta třicet tři stříbrných střech." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." }); + + testSplit(new String[] { "Heute ist der 13.12.2004." }); + testSplit(new String[] { "Dnes je 16.3.2007." }); + testSplit(new String[] { "Tohle je 1. verze testu českého tokenizeru." }); + testSplit(new String[] { "Es geht am 24.09. los." }); + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + testSplit(new String[] { + "„Česká sazba se oproti okolnímu světu v některých aspektech mírně liší”. ", "Bylo řečeno." }); + testSplit(new String[] { "„Jeď nejrychleji jak můžeš”, řekla mu tiše." }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", "And here's one that's not comp" }); + + // Tests taken from LanguageTool's SentenceSplitterTest.py: + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + // known to fail: + // testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "He won't say No.", "Not really." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools.testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", "He lives in Spain now." }); + // From the abbreviation list: + testSplit(new String[] { "V češtině jsou zkr. i pro jazyky, např. angl., maď. a jiné." }); + testSplit(new String[] { "Titul jako doc. RNDr. Adam Řezník, Ph.D. se může vyskytnout." }); + testSplit(new String[] { "Starověký Egypt vznikl okolo r. 3150 př.n.l. (anebo 3150 př.kr.). ", + "A zanikl v r. 31 př.kr." }); + } + + private final void testSplit(final String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java new file mode 100644 index 0000000..3151ed7 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/da/DanishSRXSentenceTokenizerTest.java @@ -0,0 +1,82 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2010 Esben Aaberg + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers.da; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +/** + * @author Esben Aaberg + */ +public class DanishSRXSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private final SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("da"); + + public void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + } + + public void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dette er en sætning." }); + testSplit(new String[] { "Dette er en sætning. ", "Her er den næste." }); + testSplit(new String[] { "En sætning! ", "Yderlige en." }); + testSplit(new String[] { "En sætning... ", "Yderlige en." }); + testSplit(new String[] { "På hjemmesiden http://www.stavekontrolden.dk bygger vi stavekontrollen." }); + testSplit(new String[] { "Den 31.12. går ikke!" }); + testSplit(new String[] { "Den 3.12.2011 går ikke!" }); + testSplit(new String[] { "I det 18. og tidlige 19. århundrede hentede amerikansk kunst det meste af sin inspiration fra Europa." }); + + testSplit(new String[] { "Hendes Majestæt Dronning Margrethe II (Margrethe Alexandrine Þórhildur Ingrid, Danmarks dronning) (født 16. april 1940 på Amalienborg Slot) er siden 14. januar 1972 Danmarks regent." }); + testSplit(new String[] { "Hun har residensbolig i Christian IX's Palæ på Amalienborg Slot." }); + testSplit(new String[] { "Tronfølgeren ledte herefter statsrådsmøderne under Kong Frederik 9.'s fravær." }); + testSplit(new String[] { "Marie Hvidt, Frederik IV - En letsindig alvorsmand, Gads Forlag, 2004." }); + testSplit(new String[] { "Da vi første gang besøgte Restaurant Chr. IV, var vi de eneste gæster." }); + + testSplit(new String[] { "I dag er det den 25.12.2010." }); + testSplit(new String[] { "I dag er det d. 25.12.2010." }); + testSplit(new String[] { "I dag er den 13. december." }); + testSplit(new String[] { "Arrangementet starter ca. 17:30 i dag." }); + testSplit(new String[] { "Arrangementet starter ca. 17:30." }); + testSplit(new String[] { "Det er nævnt i punkt 3.6.4 Rygbelastende helkropsvibrationer." }); + + testSplit(new String[] { "Rent praktisk er det også lettest lige at mødes, så der kan udveksles nøgler og brugsanvisninger etc." }); + testSplit(new String[] { "Andre partier incl. borgerlige partier har deres særlige problemer: nogle samarbejder med apartheidstyret i Sydafrika, med NATO-landet Tyrkiet etc., men det skal så sandelig ikke begrunde en SF-offensiv for et samarbejde med et parti." }); + + testSplit(new String[] { "Hvad nu,, den bliver også." }); + testSplit(new String[] { "Det her er det.. ", "Og her fortsætter det." }); + + testSplit(new String[] { "Dette er en(!) sætning." }); + testSplit(new String[] { "Dette er en(!!) sætning." }); + testSplit(new String[] { "Dette er en(?) sætning." }); + testSplit(new String[] { "Dette er en(??) sætning." }); + testSplit(new String[] { "Dette er en(???) sætning." }); + testSplit(new String[] { "Militær værnepligt blev indført (traktaten krævede, at den tyske hær ikke oversteg 100.000 mand)." }); + + testSplit(new String[] { "Siden illustrerede hun \"Historierne om Regnar Lodbrog\" 1979 og \"Bjarkemål\" 1982 samt Poul Ørums \"Komedie i Florens\" 1990." }); + } + + public void testSplit(String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java new file mode 100644 index 0000000..179662d --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSRXSentenceTokenizerTest.java @@ -0,0 +1,108 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers.de; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +/** + * @author Daniel Naber + */ +public class GermanSRXSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("de"); + // accept only \n\n as paragraph: + private SRXSentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("de"); + + public void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." }); + testSplit(new String[] { "Natürliche Vererbungsprozesse prägten sich erst im 18. und frühen 19. Jahrhundert aus." }); + + testSplit(new String[] { "Friedrich I., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich II., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich IIXC., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich II. öfter auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich VII. öfter auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich X. öfter auch bekannt als Friedrich der Zehnte." }); + + testSplit(new String[] { "Heute ist der 13.12.2004." }); + testSplit(new String[] { "Heute ist der 13. Dezember." }); + testSplit(new String[] { "Heute ist der 1. Januar." }); + testSplit(new String[] { "Es geht am 24.09. los." }); + testSplit(new String[] { "Es geht um ca. 17:00 los." }); + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + testSplit(new String[] { "Diese Periode begann im 13. Jahrhundert und damit bla." }); + testSplit(new String[] { "Diese Periode begann im 13. oder 14. Jahrhundert und damit bla." }); + testSplit(new String[] { "Diese Periode datiert auf das 13. bis zum 14. Jahrhundert und damit bla." }); + + testSplit(new String[] { "Das gilt lt. aktuellem Plan." }); + testSplit(new String[] { "Orangen, Äpfel etc. werden gekauft." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + testSplit(new String[] { "»Der Papagei ist grün.« ", "Das kam so." }); + testSplit(new String[] { "»Der Papagei ist grün«, sagte er" }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // Tests created as part of regression testing of SRX tokenizer. + // They come from Schuld und Sühne (Crime and Punishment) book. + testSplit(new String[] { "schlug er die Richtung nach der K … brücke ein. " }); + testSplit(new String[] { "sobald ich es von einem Freunde zurückbekomme …« Er wurde verlegen und schwieg." }); + // testSplit(new String[] { "Verstehen Sie wohl? ", "… ", "Gestatten Sie mir noch die Frage" }); + testSplit(new String[] { "Er kannte eine Unmenge Quellen, aus denen er schöpfen konnte, d. h. natürlich, wo er durch Arbeit sich etwas verdienen konnte." }); + testSplit(new String[] { "Stimme am lautesten heraustönte …. ", "Sobald er auf der Straße war" }); +// testSplit(new String[] { "Aber nein doch, er hörte alles nur zu deutlich! ", "\n", "… ", "›Also, wenn's so ist" }); + testSplit(new String[] { "»Welche Wohnung?\" ", "»Die, wo wir arbeiten." }); + testSplit(new String[] { "»Nun also, wie ist's?« fragte Lushin und blickte sie fest an." }); +// testSplit(new String[] { "gezeigt hat.« ", "… ", "Hm! " }); + } + + public void testSplit(String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java new file mode 100644 index 0000000..6033df3 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizerTest.java @@ -0,0 +1,100 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers.de; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; + +/** + * @author Daniel Naber + */ +public class GermanSentenceTokenizerTest extends TestCase { + + private GermanSentenceTokenizer sTokenizer = new GermanSentenceTokenizer(); + // accept "foo" as an abbreviation: + private GermanSentenceTokenizer sTokenizerWithFoo = new GermanSentenceTokenizer(new String[]{"foo"}); + + public void setUp() { + sTokenizer.setSingleLineBreaksMarksParagraph(true); + } + + public void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." }); + testSplit(new String[] { "Natürliche Vererbungsprozesse prägten sich erst im 18. und frühen 19. Jahrhundert aus." }); + + testSplit(new String[] { "Friedrich I., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich II., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich IIXC., auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich II. öfter auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich VII. öfter auch bekannt als Friedrich der Große." }); + testSplit(new String[] { "Friedrich X. öfter auch bekannt als Friedrich der Zehnte." }); + + testSplit(new String[] { "Heute ist der 13.12.2004." }); + testSplit(new String[] { "Heute ist der 13. Dezember." }); + testSplit(new String[] { "Heute ist der 1. Januar." }); + testSplit(new String[] { "Es geht am 24.09. los." }); + testSplit(new String[] { "Es geht um ca. 17:00 los." }); + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + testSplit(new String[] { "Diese Periode begann im 13. Jahrhundert und damit bla." }); + testSplit(new String[] { "Diese Periode begann im 13. oder 14. Jahrhundert und damit bla." }); + testSplit(new String[] { "Diese Periode datiert auf das 13. bis zum 14. Jahrhundert und damit bla." }); + + testSplit(new String[] { "Das gilt lt. aktuellem Plan." }); + testSplit(new String[] { "Orangen, Äpfel etc. werden gekauft." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + testSplit(new String[] { "»Der Papagei ist grün.« ", "Das kam so." }); + testSplit(new String[] { "»Der Papagei ist grün«, sagte er" }); + + // incorrect sentences: + testSplit(new String[] { "Dies ist ein Satz. ", " und der nächste fängt klein an - das ist falsch." }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // test adding own list of abbreviations: + TestTools.testSplit(new String[] { "Hier ist foo. ", "keine Abk. im Text." }, sTokenizer); + TestTools.testSplit(new String[] { "Hier ist foo. eine Abk. im Text." }, sTokenizerWithFoo); + } + + private void testSplit(String[] sentences) { + TestTools.testSplit(sentences, sTokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java new file mode 100644 index 0000000..4ef4e78 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchSRXSentenceTokenizerTest.java @@ -0,0 +1,83 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers.nl; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +/** + * @author Daniel Naber + * @author Adapted by R. Baars for Dutch * + */ +public class DutchSRXSentenceTokenizerTest extends TestCase { + + private SRXSentenceTokenizer stokenizer = new SRXSentenceTokenizer("nl"); + + public void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + } + + public void testTokenize() { + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dit is een zin." }); + testSplit(new String[] { "Dit is een zin. ", "Nog een." }); + testSplit(new String[] { "Een zin! ", "Nog een." }); + testSplit(new String[] { "Een zin... ", "Nog een." }); + testSplit(new String[] { "Op http://www.test.de vind je een website." }); + testSplit(new String[] { "De brief is op 3.10 gedateerd." }); + testSplit(new String[] { "De brief is op 31.1 gedateerd." }); + testSplit(new String[] { "De breif is op 3.10.2000 gedateerd." }); + + testSplit(new String[] { "Vandaag is het 13.12.2004." }); + testSplit(new String[] { "Op 24.09 begint het." }); + testSplit(new String[] { "Om 17:00 begint het." }); + testSplit(new String[] { "In paragraaf 3.9.1 is dat beschreven." }); + + testSplit(new String[] { "Januari jl. is dat vastgelegd." }); + testSplit(new String[] { "Appel en pruimen enz. werden gekocht." }); + testSplit(new String[] { "De afkorting n.v.t. betekent niet van toepassing." }); + + testSplit(new String[] { "Bla et al. blah blah." }); + + testSplit(new String[] { "Dat is,, of het is bla." }); + testSplit(new String[] { "Dat is het.. ", "Zo gaat het verder." }); + + testSplit(new String[] { "Dit hier is een(!) zin." }); + testSplit(new String[] { "Dit hier is een(!!) zin." }); + testSplit(new String[] { "Dit hier is een(?) zin." }); + testSplit(new String[] { "Dit hier is een(???) zin." }); + testSplit(new String[] { "Dit hier is een(???) zin." }); + + testSplit(new String[] { "»De papagaai is groen.« ", "Dat was hij al." }); + testSplit(new String[] { "»De papagaai is groen«, zei hij." }); + + testSplit(new String[] {"Als voetballer wordt hij nooit een prof. ", "Maar prof. N.A.W. Th.Ch. Janssen wordt dat wel."}); + + // TODO, zin na dubbele punt + testSplit(new String[] { "Dat was het: helemaal niets." }); + testSplit(new String[] { "Dat was het: het is een nieuwe zin." }); + } + + private void testSplit(String[] sentences) { + TestTools.testSplit(sentences, stokenizer); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java new file mode 100644 index 0000000..be2aab1 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizerTest.java @@ -0,0 +1,38 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers.nl; + +import junit.framework.TestCase; + +import java.util.List; + +public class DutchWordTokenizerTest extends TestCase { + + public void testTokenize() { + DutchWordTokenizer w = new DutchWordTokenizer(); + List<String> testList = w.tokenize("This is\u00A0a test"); + assertEquals(testList.size(), 7); + assertEquals("[This, , is, \u00A0, a, , test]", testList.toString()); + testList = w.tokenize("Bla bla oma's bla bla 'test"); + assertEquals(testList.size(), 12); + assertEquals("[Bla, , bla, , oma's, , bla, , bla, , ', test]", + testList.toString()); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java new file mode 100644 index 0000000..3fa11f5 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/pl/PolishSentenceTokenizerTest.java @@ -0,0 +1,152 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers.pl; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +public class PolishSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("pl"); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("pl"); + + public final void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public final void testTokenize() { + + testSplit(new String[] { "This is a sentence. " }); + + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + testSplit(new String[] { "To się wydarzyło 3.10.2000 i mam na to dowody." }); + + testSplit(new String[] { "To było 13.12 - nikt nie zapomni tego przemówienia." }); + testSplit(new String[] { "Heute ist der 13.12.2004." }); + testSplit(new String[] { "To jest np. ten debil spod jedynki." }); + testSplit(new String[] { "To jest 1. wydanie." }); + testSplit(new String[] { "Dziś jest 13. rocznica powstania wąchockiego." }); + + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + testSplit(new String[] { "To jest tzw. premier." }); + testSplit(new String[] { "Jarek kupił sobie kurteczkę, tj. strój Marka." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + testSplit(new String[] { "„Prezydent jest niemądry”. ", "Tak wyszło." }); + testSplit(new String[] { "„Prezydent jest niemądry”, powiedział premier" }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", + "And here's one that's not comp" }); + + // Tests taken from LanguageTool's SentenceSplitterTest.py: + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split strings like U.S.A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", + "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", + "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", + "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + // known to fail: + // testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools + .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools + .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", + "He lives in Spain now." }); + + // from user bug reports: + testSplit(new String[] { "Temperatura wody w systemie wynosi 30°C.", + "W skład obiegu otwartego wchodzi zbiornik i armatura." }); + testSplit(new String[] { "Zabudowano kolumny o długości 45 m. ", + "Woda z ujęcia jest dostarczana do zakładu." }); + + // two-letter initials: + testSplit(new String[] { "Najlepszym polskim reżyserem był St. Różewicz. ", "Chodzi o brata wielkiego poety." }); + + // From the abbreviation list: + testSplit(new String[] { "Ks. Jankowski jest prof. teologii." }); + testSplit(new String[] { "To wydarzyło się w 1939 r.", + "To był burzliwy rok." }); + testSplit(new String[] { "Prezydent jest popierany przez 20 proc. społeczeństwa." }); + testSplit(new String[] { + "Moje wystąpienie ma na celu zmobilizowanie zarządu partii do działań, które umożliwią uzyskanie 40 proc.", + "Nie widzę dziś na scenie politycznej formacji, która lepiej by łączyła różne poglądy" }); + testSplit(new String[] { "To jest zmienna A.", "Zaś to jest zmienna B." }); + // SKROTY_BEZ_KROPKI in ENDABREVLIST + testSplit(new String[] { "Mam już 20 mln.", "To powinno mi wystarczyć" }); + testSplit(new String[] { "Mam już 20 mln. buraków." }); + // ellipsis + testSplit(new String[] { "Rytmem tej wiecznie przemijającej światowej egzystencji […] rytmem mesjańskiej natury jest szczęście." }); + // sic! + testSplit(new String[] { "W gazecie napisali, że pasy (sic!) pogryzły człowieka." }); + // Numbers with dots. + testSplit(new String[] { "Mam w magazynie dwie skrzynie LMD20. ", "Jestem żołnierzem i wiem, jak można ich użyć"}); + } + + private final void testSplit(final String[] sentences) { + TestTools.testSplit(sentences, stokenizer2); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java new file mode 100644 index 0000000..7e94ac7 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianSentenceTokenizerTest.java @@ -0,0 +1,157 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tokenizers.ro; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +/** + * + * @author Ionuț Păduraru + * @since 07.05.2009 10:28:59 + * + */ +public class RomanianSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("ro"); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("ro"); + + public final void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public final void testTokenize() { + + testSplit(new String[] { "Aceasta este o propozitie fara diacritice. " }); + testSplit(new String[] { "Aceasta este o fraza fara diacritice. ", + "Propozitia a doua, tot fara diacritice. " }); + testSplit(new String[] { "Aceasta este o propoziție cu diacritice. " }); + testSplit(new String[] { "Aceasta este o propoziție cu diacritice. ", + "Propoziția a doua, cu diacritice. " }); + + testSplit(new String[] { "O propoziție! ", "Și încă o propoziție. "}); + testSplit(new String[] { "O propoziție... ", "Și încă o propoziție. "}); + testSplit(new String[] { "La adresa http://www.archeus.ro găsiți resurse lingvistice. "}); + testSplit(new String[] { "Data de 10.02.2009 nu trebuie să fie separator de propoziții. "}); + testSplit(new String[] { "Astăzi suntem în data de 07.05.2007. "}); + testSplit(new String[] { "Astăzi suntem în data de 07/05/2007. "}); + testSplit(new String[] { "La anumărul (1) avem puține informații. "}); + testSplit(new String[] { "To jest 1. wydanie." }); + testSplit(new String[] { "La anumărul 1. avem puține informații. "}); + testSplit(new String[] { "La anumărul 13. avem puține informații. "}); + testSplit(new String[] { "La anumărul 1.3.3 avem puține informații. "}); + + testSplit(new String[] { "O singură propoziție... "}); + testSplit(new String[] { "Colegii mei s-au dus... "}); + testSplit(new String[] { "O singură propoziție!!! "}); + testSplit(new String[] { "O singură propoziție??? "}); + + testSplit(new String[] { "Propoziții: una și alta. "}); + + testSplit(new String[] { "Domnu' a plecat. "}); + testSplit(new String[] { "Profu' de istorie tre' să predea lecția. "}); + testSplit(new String[] { "Sal'tare! "}); + testSplit(new String[] { "'Neaţa! "}); + testSplit(new String[] { "Deodat'apare un urs. "}); + // accente + testSplit(new String[] { "A făcut două cópii. "}); + testSplit(new String[] { "Ionel adúnă acum ceea ce Maria aduná înainte să vin eu. "}); + + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Domnu' a plecat" }); + testSplit(new String[] { "Domnu' a plecat. ", + "El nu a plecat" }); + + testSplit(new String[] { "Se pot întâlni și abrevieri precum S.U.A. " + + "sau B.C.R. într-o singură propoziție." }); + testSplit(new String[] { "Se pot întâlni și abrevieri precum S.U.A. sau B.C.R. ", + "Aici sunt două propoziții." }); + testSplit(new String[] { "Același lucru aici... ", "Aici sunt două propoziții." }); + testSplit(new String[] { "Același lucru aici... dar cu o singură propoziție." }); + + testSplit(new String[] { "„O propoziție!” ", "O alta." }); + testSplit(new String[] { "„O propoziție!!!” ", "O alta." }); + testSplit(new String[] { "„O propoziție?” ", "O alta." }); + testSplit(new String[] { "„O propoziție?!?” ", "O alta." }); + testSplit(new String[] { "«O propoziție!» ", "O alta." }); + testSplit(new String[] { "«O propoziție!!!» ", "O alta." }); + testSplit(new String[] { "«O propoziție?» ", "O alta." }); + testSplit(new String[] { "«O propoziție???» ", "O alta." }); + testSplit(new String[] { "«O propoziție?!?» ", "O alta." }); + testSplit(new String[] { "O primă propoziție. ", "(O alta.)" }); + + testSplit(new String[] { "A venit domnu' Vasile. " }); + testSplit(new String[] { "A venit domnu' acela. " }); + + // one/two returns = paragraph = new sentence: + TestTools.testSplit(new String[] { "A venit domnul\n\n", "Vasile." }, stokenizer2); + TestTools.testSplit(new String[] { "A venit domnul\n", "Vasile." }, stokenizer); + TestTools.testSplit(new String[] { "A venit domnu'\n\n", "Vasile." }, stokenizer2); + TestTools.testSplit(new String[] { "A venit domnu'\n", "Vasile." }, stokenizer); + // Missing space after sentence end: + testSplit(new String[] { "El este din România!", + "Acum e plecat cu afaceri." }); + + testSplit(new String[] { "Temperatura este de 30°C.", "Este destul de cald." }); + testSplit(new String[] { "A alergat 50 m. ", + "Deja a obosit." }); + + // From the abbreviation list: + testSplit(new String[] { "Pentru dvs. vom face o excepție." }); + testSplit(new String[] { "Pt. dumneavoastră vom face o excepție." }); + testSplit(new String[] { "Pt. dvs. vom face o excepție." }); + // din punct de vedere + testSplit(new String[] { "A expus problema d.p.d.v. artistic." }); + testSplit(new String[] { "A expus problema dpdv. artistic." }); + // şi aşa mai departe. + testSplit(new String[] { "Are mere, pere, șamd. dar nu are alune." }); + testSplit(new String[] { "Are mere, pere, ș.a.m.d. dar nu are alune." }); + testSplit(new String[] { "Are mere, pere, ș.a.m.d. ", "În schimb, nu are alune." }); + // şi celelalte + testSplit(new String[] { "Are mere, pere, ş.c.l. dar nu are alune." }); + testSplit(new String[] { "Are mere, pere, ş.c.l. ", "Nu are alune." }); + // etc. et cetera + testSplit(new String[] { "Are mere, pere, etc. dar nu are alune." }); + testSplit(new String[] { "Are mere, pere, etc. ", "Nu are alune." }); + // ş.a. - şi altele + testSplit(new String[] { "Are mere, pere, ș.a. dar nu are alune." }); + // M.Ap.N. - Ministerul Apărării Nationale + // there are 2 rules for this in segment.srx. Can this be done with only one rule? + testSplit(new String[] { "A fost și la M.Ap.N. dar nu l-au primit. " }); + testSplit(new String[] { "A fost și la M.Ap.N. ", "Nu l-au primit. " }); + + // sic! + testSplit(new String[] { "Apo' da' tulai (sic!) că mult mai e de mers." }); + testSplit(new String[] { "Apo' da' tulai(sic!) că mult mai e de mers." }); + + // […] + testSplit(new String[] { "Aici este o frază […] mult prescurtată." }); + testSplit(new String[] { "Aici este o frază [...] mult prescurtată." }); + } + + private final void testSplit(final String[] sentences) { + TestTools.testSplit(sentences, stokenizer2); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java new file mode 100644 index 0000000..055a0ee --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizerTest.java @@ -0,0 +1,122 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers.ro; + +import java.util.List; + +import junit.framework.TestCase; + +public class RomanianWordTokenizerTest extends TestCase { + + public void testTokenize() { + // basic test - simle words, no diacritics + RomanianWordTokenizer w = new RomanianWordTokenizer(); + List<String> testList = w.tokenize("Aceaste mese sunt bune"); + assertEquals(testList.size(), 7); + assertEquals("[Aceaste, , mese, , sunt, , bune]", testList.toString()); + + // basic test - simle words, with diacritics + testList = w.tokenize("Această carte este frumoasă"); + assertEquals(testList.size(), 7); + assertEquals("[Această, , carte, , este, , frumoasă]", testList.toString()); + + // test for "-" + testList = w.tokenize("nu-ți doresc"); + assertEquals(testList.size(), 5); + assertEquals("[nu, -, ți, , doresc]", + testList.toString()); + + // test for "„" + testList = w.tokenize("zicea „merge"); + assertEquals(testList.size(), 4); + assertEquals("[zicea, , „, merge]", + testList.toString()); + + // test for "„" with white space + testList = w.tokenize("zicea „ merge"); + assertEquals(testList.size(), 5); + assertEquals("[zicea, , „, , merge]", + testList.toString()); + + // test for "„" + testList = w.tokenize("zicea merge”"); + assertEquals(testList.size(), 4); + assertEquals("[zicea, , merge, ”]", + testList.toString()); + + // test for "„" and "„" + testList = w.tokenize("zicea „merge bine”"); + assertEquals(testList.size(), 7); + assertEquals("[zicea, , „, merge, , bine, ”]", + testList.toString()); + + //ți-am + testList = w.tokenize("ți-am"); + assertEquals(testList.size(), 3); + assertEquals("[ți, -, am]", + testList.toString()); + + // test for "«" and "»" + testList = w.tokenize("zicea «merge bine»"); + assertEquals(testList.size(), 7); + assertEquals("[zicea, , «, merge, , bine, »]", + testList.toString()); + // test for "<" and ">" + testList = w.tokenize("zicea <<merge bine>>"); + assertEquals(testList.size(), 9); + assertEquals("[zicea, , <, <, merge, , bine, >, >]", + testList.toString()); + // test for "%" + testList = w.tokenize("avea 15% apă"); + assertEquals(testList.size(), 6); + assertEquals("[avea, , 15, %, , apă]", + testList.toString()); + // test for "°" + testList = w.tokenize("are 30°C"); + assertEquals(testList.size(), 5); + assertEquals("[are, , 30, °, C]", + testList.toString()); + // test for "=" + testList = w.tokenize("fructe=mere"); + assertEquals(testList.size(), 3); + assertEquals("[fructe, =, mere]", + testList.toString()); + // test for "|" + testList = w.tokenize("pere|mere"); + assertEquals(testList.size(), 3); + assertEquals("[pere, |, mere]", + testList.toString()); + // test for "\n" + testList = w.tokenize("pere\nmere"); + assertEquals(testList.size(), 3); + assertEquals("[pere, \n, mere]", + testList.toString()); + // test for "\r" + testList = w.tokenize("pere\rmere"); + assertEquals(testList.size(), 3); + assertEquals("[pere, \r, mere]", + testList.toString()); + // test for "\n\r" + testList = w.tokenize("pere\n\rmere"); + assertEquals(testList.size(), 4); + assertEquals("[pere, \n, \r, mere]", + testList.toString()); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java new file mode 100644 index 0000000..cc7d101 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tokenizers/sk/SlovakSentenceTokenizerTest.java @@ -0,0 +1,143 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tokenizers.sk; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.TestTools; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +public class SlovakSentenceTokenizerTest extends TestCase { + + // accept \n as paragraph: + private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("sk"); + // accept only \n\n as paragraph: + private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("sk"); + + public final void setUp() { + stokenizer.setSingleLineBreaksMarksParagraph(true); + stokenizer2.setSingleLineBreaksMarksParagraph(false); + } + + public final void testTokenize() { + + testSplit(new String[] { "This is a sentence. " }); + + // NOTE: sentences here need to end with a space character so they + // have correct whitespace when appended: + testSplit(new String[] { "Dies ist ein Satz." }); + testSplit(new String[] { "Dies ist ein Satz. ", "Noch einer." }); + testSplit(new String[] { "Ein Satz! ", "Noch einer." }); + testSplit(new String[] { "Ein Satz... ", "Noch einer." }); + testSplit(new String[] { "Unter http://www.test.de gibt es eine Website." }); + + testSplit(new String[] { "Das ist,, also ob es bla." }); + testSplit(new String[] { "Das ist es.. ", "So geht es weiter." }); + + testSplit(new String[] { "Das hier ist ein(!) Satz." }); + testSplit(new String[] { "Das hier ist ein(!!) Satz." }); + testSplit(new String[] { "Das hier ist ein(?) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + testSplit(new String[] { "Das hier ist ein(???) Satz." }); + + // TODO: derzeit unterscheiden wir nicht, ob nach dem Doppelpunkt ein + // ganzer Satz kommt oder nicht: + testSplit(new String[] { "Das war es: gar nichts." }); + testSplit(new String[] { "Das war es: Dies ist ein neuer Satz." }); + + // incomplete sentences, need to work for on-thy-fly checking of texts: + testSplit(new String[] { "Here's a" }); + testSplit(new String[] { "Here's a sentence. ", + "And here's one that's not comp" }); + + testSplit(new String[] { "„Prezydent jest niemądry”. ", "Tak wyszło." }); + testSplit(new String[] { "„Prezydent jest niemądry”, powiedział premier" }); + + testSplit(new String[] { "Das Schreiben ist auf den 3.10. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 31.1. datiert." }); + testSplit(new String[] { "Das Schreiben ist auf den 3.10.2000 datiert." }); + testSplit(new String[] { "Toto 2. vydanie bolo rozobrané za 1,5 roka." }); + testSplit(new String[] { "Festival Bažant Pohoda slávi svoje 10. výročie." }); + testSplit(new String[] { "Dlho odkladané parlamentné voľby v Angole sa uskutočnia 5. septembra." }); + testSplit(new String[] { "Das in Punkt 3.9.1 genannte Verhalten." }); + + // From the abbreviation list: + testSplit(new String[] { "Aké sú skutočné príčiny tzv. transformačných príznakov?" }); + testSplit(new String[] { "Aké príplatky zamestnancovi (napr. za nadčas) stanovuje Zákonník práce?" }); + testSplit(new String[] { "Počas neprítomnosti zastupuje MUDr. Marianna Krupšová." }); + testSplit(new String[] { "Staroveký Egypt vznikol okolo r. 3150 p.n.l. (tzn. 3150 pred Kr.). ", + "A zanikol v r. 31 pr. Kr." }); + + // from user bug reports: + testSplit(new String[] { "Temperatura wody w systemie wynosi 30°C.", + "W skład obiegu otwartego wchodzi zbiornik i armatura." }); + testSplit(new String[] { "Zabudowano kolumny o długości 45 m. ", + "Woda z ujęcia jest dostarczana do zakładu." }); + + // two-letter initials: + testSplit(new String[] { "Najlepszym polskim reżyserem był St. Różewicz. ", "Chodzi o brata wielkiego poety." }); + testSplit(new String[] { "Nore M. hrozí za podvod 10 až 15 rokov." }); + testSplit(new String[] { "To jest zmienna A.", "Zaś to jest zmienna B." }); + // Numbers with dots. + testSplit(new String[] { "Mam w magazynie dwie skrzynie LMD20. ", "Jestem żołnierzem i wiem, jak można ich użyć"}); + // ellipsis + testSplit(new String[] { "Rytmem tej wiecznie przemijającej światowej egzystencji […] rytmem mesjańskiej natury jest szczęście." }); + + + // Tests taken from LanguageTool's SentenceSplitterTest.py: + testSplit(new String[] { "This is a sentence. " }); + testSplit(new String[] { "This is a sentence. ", "And this is another one." }); + testSplit(new String[] { "This is a sentence.", "Isn't it?", "Yes, it is." }); + + testSplit(new String[] { "Don't split strings like U. S. A. either." }); + testSplit(new String[] { "Don't split strings like U.S.A. either." }); + testSplit(new String[] { "Don't split... ", "Well you know. ", + "Here comes more text." }); + testSplit(new String[] { "Don't split... well you know. ", + "Here comes more text." }); + testSplit(new String[] { "The \".\" should not be a delimiter in quotes." }); + testSplit(new String[] { "\"Here he comes!\" she said." }); + testSplit(new String[] { "\"Here he comes!\", she said." }); + testSplit(new String[] { "\"Here he comes.\" ", + "But this is another sentence." }); + testSplit(new String[] { "\"Here he comes!\". ", "That's what he said." }); + testSplit(new String[] { "The sentence ends here. ", "(Another sentence.)" }); + // known to fail: + // testSplit(new String[]{"He won't. ", "Really."}); + testSplit(new String[] { "He won't go. ", "Really." }); + testSplit(new String[] { "He won't say no.", "Not really." }); + testSplit(new String[] { "This is it: a test." }); + // one/two returns = paragraph = new sentence: + TestTools + .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\n", "Really." }, stokenizer); + TestTools + .testSplit(new String[] { "He won't\n\n", "Really." }, stokenizer2); + TestTools.testSplit(new String[] { "He won't\nReally." }, stokenizer2); + // Missing space after sentence end: + testSplit(new String[] { "James is from the Ireland!", + "He lives in Spain now." }); + } + + private final void testSplit(final String[] sentences) { + TestTools.testSplit(sentences, stokenizer2); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java new file mode 100644 index 0000000..8e24005 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/StringToolsTest.java @@ -0,0 +1,263 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.tools; + +import de.danielnaber.languagetool.rules.RuleMatch; +import de.danielnaber.languagetool.rules.en.AvsAnRule; +import junit.framework.TestCase; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +import de.danielnaber.languagetool.Language; + +/** + * @author Daniel Naber + */ +public class StringToolsTest extends TestCase { + + public void testAssureSet() { + String s = ""; + try { + StringTools.assureSet(s, "varName"); + fail(); + } catch (IllegalArgumentException e) { + // expected exception + } + s = " \t"; + try { + StringTools.assureSet(s, "varName"); + fail(); + } catch (IllegalArgumentException e) { + // expected exception + } + s = null; + try { + StringTools.assureSet(s, "varName"); + fail(); + } catch (NullPointerException e) { + // expected exception + } + s = "foo"; + StringTools.assureSet(s, "varName"); + } + + public void testReadFile() throws IOException { + final String content = StringTools.readFile(new FileInputStream("src/test/testinput.txt"), "utf-8"); + assertEquals("one\ntwo\nöäüß\n", content); + } + + public void testIsAllUppercase() { + assertTrue(StringTools.isAllUppercase("A")); + assertTrue(StringTools.isAllUppercase("ABC")); + assertTrue(StringTools.isAllUppercase("ASV-EDR")); + assertTrue(StringTools.isAllUppercase("ASV-ÖÄÜ")); + assertTrue(StringTools.isAllUppercase("")); + + assertFalse(StringTools.isAllUppercase("ß")); + assertFalse(StringTools.isAllUppercase("AAAAAAAAAAAAq")); + assertFalse(StringTools.isAllUppercase("a")); + assertFalse(StringTools.isAllUppercase("abc")); + } + + public void testIsMixedCase() { + assertTrue(StringTools.isMixedCase("AbC")); + assertTrue(StringTools.isMixedCase("MixedCase")); + assertTrue(StringTools.isMixedCase("iPod")); + assertTrue(StringTools.isMixedCase("AbCdE")); + + assertFalse(StringTools.isMixedCase("")); + assertFalse(StringTools.isMixedCase("ABC")); + assertFalse(StringTools.isMixedCase("abc")); + assertFalse(StringTools.isMixedCase("!")); + assertFalse(StringTools.isMixedCase("Word")); + } + + public void testIsCapitalizedWord() { + assertTrue(StringTools.isCapitalizedWord("Abc")); + assertTrue(StringTools.isCapitalizedWord("Uppercase")); + assertTrue(StringTools.isCapitalizedWord("Ipod")); + + assertFalse(StringTools.isCapitalizedWord("")); + assertFalse(StringTools.isCapitalizedWord("ABC")); + assertFalse(StringTools.isCapitalizedWord("abc")); + assertFalse(StringTools.isCapitalizedWord("!")); + assertFalse(StringTools.isCapitalizedWord("wOrD")); + } + + public void testStartsWithUppercase() { + assertTrue(StringTools.startsWithUppercase("A")); + assertTrue(StringTools.startsWithUppercase("ÄÖ")); + + assertFalse(StringTools.startsWithUppercase("")); + assertFalse(StringTools.startsWithUppercase("ß")); + assertFalse(StringTools.startsWithUppercase("-")); + } + + public void testUppercaseFirstChar() { + assertEquals("", StringTools.uppercaseFirstChar("")); + assertEquals("A", StringTools.uppercaseFirstChar("A")); + assertEquals("Öäü", StringTools.uppercaseFirstChar("öäü")); + assertEquals("ßa", StringTools.uppercaseFirstChar("ßa")); + assertEquals("'Test'", StringTools.uppercaseFirstChar("'test'")); + assertEquals("''Test", StringTools.uppercaseFirstChar("''test")); + assertEquals("''T", StringTools.uppercaseFirstChar("''t")); + assertEquals("'''", StringTools.uppercaseFirstChar("'''")); + } + + public void testLowercaseFirstChar() { + assertEquals("", StringTools.lowercaseFirstChar("")); + assertEquals("a", StringTools.lowercaseFirstChar("A")); + assertEquals("öäü", StringTools.lowercaseFirstChar("Öäü")); + assertEquals("ßa", StringTools.lowercaseFirstChar("ßa")); + assertEquals("'test'", StringTools.lowercaseFirstChar("'Test'")); + assertEquals("''test", StringTools.lowercaseFirstChar("''Test")); + assertEquals("''t", StringTools.lowercaseFirstChar("''T")); + assertEquals("'''", StringTools.lowercaseFirstChar("'''")); + } + + public void testReaderToString() throws IOException { + final String str = StringTools.readerToString(new StringReader("bla\nöäü")); + assertEquals("bla\nöäü", str); + final StringBuilder longStr = new StringBuilder(); + for (int i = 0; i < 4000; i++) { + longStr.append("x"); + } + longStr.append("1234567"); + assertEquals(4007, longStr.length()); + final String str2 = StringTools.readerToString(new StringReader(longStr.toString())); + assertEquals(longStr.toString(), str2); + } + + public void testEscapeXMLandHTML() { + assertEquals("!ä"<>&&", StringTools.escapeXML("!ä\"<>&&")); + assertEquals("!ä"<>&&", StringTools.escapeHTML("!ä\"<>&&")); + } + + public void testRuleMatchesToXML() throws IOException { + final List<RuleMatch> matches = new ArrayList<RuleMatch>(); + final String text = "This is an test sentence. Here's another sentence with more text."; + final RuleMatch match = new RuleMatch(new AvsAnRule(null), 8, 10, "myMessage"); + match.setColumn(99); + match.setEndColumn(100); + match.setLine(44); + match.setEndLine(45); + matches.add(match); + final String xml = StringTools.ruleMatchesToXML(matches, text, 5, StringTools.XmlPrintMode.NORMAL_XML); + assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + + "<matches>\n" + + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" errorlength=\"2\"/>\n" + + "</matches>\n", xml); + } + + public void testListToString() { + final List<String> list = new ArrayList<String>(); + list.add("foo"); + list.add("bar"); + list.add(","); + assertEquals("foo,bar,,", StringTools.listToString(list, ",")); + assertEquals("foo\tbar\t,", StringTools.listToString(list, "\t")); + } + + public void testGetContext() { + final String input = "This is a test sentence. Here's another sentence with more text."; + final String result = StringTools.getContext(8, 14, input, 5); + assertEquals("...s is a test sent...\n ^^^^^^ ", result); + } + + public void testTrimWhitespace() { + try { + assertEquals(null, StringTools.trimWhitespace(null)); + fail(); + } catch (NullPointerException e) { + // expected + } + assertEquals("", StringTools.trimWhitespace("")); + assertEquals("", StringTools.trimWhitespace(" ")); + assertEquals("XXY", StringTools.trimWhitespace(" \nXX\t Y")); + // TODO: make this work assertEquals("XXY", StringTools.trimWhitespace(" \r\nXX\t Y")); + assertEquals("word", StringTools.trimWhitespace("word")); + } + + public void testAddSpace() { + assertEquals(" ", StringTools.addSpace("word", Language.ENGLISH)); + assertEquals("", StringTools.addSpace(",", Language.ENGLISH)); + assertEquals("", StringTools.addSpace(",", Language.FRENCH)); + assertEquals("", StringTools.addSpace(",", Language.ENGLISH)); + assertEquals(" ", StringTools.addSpace(":", Language.FRENCH)); + assertEquals("", StringTools.addSpace(",", Language.ENGLISH)); + assertEquals(" ", StringTools.addSpace(";", Language.FRENCH)); + } + + public void testGetLabel() { + assertEquals("This is a Label", StringTools.getLabel("This is a &Label")); + assertEquals("Bits & Pieces", StringTools.getLabel("Bits && Pieces")); + } + + public void testGetOOoLabel() { + assertEquals("This is a ~Label", StringTools.getOOoLabel("This is a &Label")); + assertEquals("Bits & Pieces", StringTools.getLabel("Bits && Pieces")); + } + + public void testGetMnemonic() { + assertEquals('F', StringTools.getMnemonic("&File")); + assertEquals('O', StringTools.getMnemonic("&OK")); + assertEquals('\u0000', + StringTools.getMnemonic("File && String operations")); + assertEquals('O', + StringTools.getMnemonic("File && String &Operations")); + } + + public void testIsWhitespace() { + assertEquals(true, StringTools.isWhitespace(" ")); + assertEquals(true, StringTools.isWhitespace("\t")); + assertEquals(true, StringTools.isWhitespace("\u2002")); + //non-breaking space is not a whitespace + assertEquals(false, StringTools.isWhitespace("\u00a0")); + assertEquals(false, StringTools.isWhitespace("abc")); + //non-breaking OOo field + assertEquals(false, StringTools.isWhitespace("\\u02")); + assertEquals(false, StringTools.isWhitespace("\u0001")); + } + + public void testIsPositiveNumber() { + assertEquals(true, StringTools.isPositiveNumber('3')); + assertEquals(false, StringTools.isPositiveNumber('a')); + } + + public void testIsEmpty() { + assertEquals(true, StringTools.isEmpty("")); + assertEquals(true, StringTools.isEmpty(null)); + assertEquals(false, StringTools.isEmpty("a")); + } + + public void testFilterXML() { + assertEquals("test", StringTools.filterXML("test")); + assertEquals("<<test>>", StringTools.filterXML("<<test>>")); + assertEquals("test", StringTools.filterXML("<b>test</b>")); + assertEquals("A sentence with a test", StringTools.filterXML("A sentence with a <em>test</em>")); + } + + public void testAsString() { + assertNull(StringTools.asString(null)); + assertEquals("foo!", "foo!"); + } + +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java new file mode 100644 index 0000000..91c47ac --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/ToolsTest.java @@ -0,0 +1,105 @@ +package de.danielnaber.languagetool.tools; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.List; + +import javax.xml.parsers.ParserConfigurationException; + +import org.xml.sax.SAXException; + +import junit.framework.TestCase; +import de.danielnaber.languagetool.JLanguageTool; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.bitext.BitextRule; + +public class ToolsTest extends TestCase { + + private ByteArrayOutputStream out; + private ByteArrayOutputStream err; + private PrintStream stdout; + private PrintStream stderr; + + public void setUp() throws Exception { + super.setUp(); + this.stdout = System.out; + this.stderr = System.err; + this.out = new ByteArrayOutputStream(); + this.err = new ByteArrayOutputStream(); + System.setOut(new PrintStream(this.out)); + System.setErr(new PrintStream(this.err)); + } + + public void tearDown() throws Exception { + super.tearDown(); + System.setOut(this.stdout); + System.setErr(this.stderr); + } + + public void testCheck() throws IOException, ParserConfigurationException, SAXException { + final JLanguageTool tool = new JLanguageTool(Language.POLISH); + tool.activateDefaultPatternRules(); + tool.activateDefaultFalseFriendRules(); + + int matches = Tools.checkText("To jest całkowicie prawidłowe zdanie.", tool); + String output = new String(this.out.toByteArray()); + assertEquals(0, output.indexOf("Time:")); + assertEquals(0, matches); + + matches = Tools.checkText("To jest jest problem.", tool); + output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Rule ID: WORD_REPEAT_RULE") != -1); + assertEquals(1, matches); + } + + public void testCorrect() throws IOException, ParserConfigurationException, SAXException { + JLanguageTool tool = new JLanguageTool(Language.POLISH); + tool.activateDefaultPatternRules(); + tool.activateDefaultFalseFriendRules(); + + String correct = Tools.correctText("To jest całkowicie prawidłowe zdanie.", tool); + assertEquals("To jest całkowicie prawidłowe zdanie.", correct); + correct = Tools.correctText("To jest jest problem.", tool); + assertEquals("To jest problem.", correct); + + // more sentences, need to apply more suggestions > 1 in subsequent sentences + correct = Tools.correctText("To jest jest problem. Ale to już już nie jest problem.", tool); + assertEquals("To jest problem. Ale to już nie jest problem.", correct); + correct = Tools.correctText("To jest jest problem. Ale to już już nie jest problem. Tak sie nie robi. W tym zdaniu brakuje przecinka bo go zapomniałem.", tool); + assertEquals("To jest problem. Ale to już nie jest problem. Tak się nie robi. W tym zdaniu brakuje przecinka, bo go zapomniałem.", correct); + + //now English + tool = new JLanguageTool(Language.ENGLISH); + tool.activateDefaultPatternRules(); + tool.activateDefaultFalseFriendRules(); + + assertEquals("This is a test.", Tools.correctText("This is an test.", tool)); + + } + + public void testBitextCheck() throws IOException, ParserConfigurationException, SAXException { + final JLanguageTool srcTool = new JLanguageTool(Language.ENGLISH); + final JLanguageTool trgTool = new JLanguageTool(Language.POLISH); + trgTool.activateDefaultPatternRules(); + + final List<BitextRule> rules = Tools.getBitextRules(Language.ENGLISH, Language.POLISH); + + int matches = Tools.checkBitext( + "This is a perfectly good sentence.", + "To jest całkowicie prawidłowe zdanie.", srcTool, trgTool, rules, + false, StringTools.XmlPrintMode.NORMAL_XML); + String output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Time:") == 0); + assertEquals(0, matches); + + matches = Tools.checkBitext( + "This is not actual.", + "To nie jest aktualne.", + srcTool, trgTool, + rules, false, StringTools.XmlPrintMode.NORMAL_XML); + output = new String(this.out.toByteArray()); + assertTrue(output.indexOf("Rule ID: ACTUAL") != -1); + assertEquals(1, matches); + } +} diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java new file mode 100644 index 0000000..8aa79c2 --- /dev/null +++ b/JLanguageTool/src/test/de/danielnaber/languagetool/tools/UnsyncStackTest.java @@ -0,0 +1,39 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.tools; + +import junit.framework.TestCase; + +public class UnsyncStackTest extends TestCase { + + public UnsyncStackTest(String name) { + super(name); + } + + public void testStack() { + UnsyncStack<String> stack = new UnsyncStack<String>(); + assertTrue(stack.empty()); + stack.push("test"); + assertEquals("test", stack.peek()); + assertFalse(stack.empty()); + assertEquals("test", stack.pop()); + assertTrue(stack.empty()); + } +} diff --git a/JLanguageTool/src/test/testinput.txt b/JLanguageTool/src/test/testinput.txt new file mode 100644 index 0000000..a0d7d78 --- /dev/null +++ b/JLanguageTool/src/test/testinput.txt @@ -0,0 +1,3 @@ +one +two +öäüß |