summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/language
diff options
context:
space:
mode:
authorArno Teigseth <arno@teigseth.no>2011-02-05 08:48:27 +0000
committerArno Teigseth <arno@teigseth.no>2011-02-05 08:48:27 +0000
commit4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch)
tree7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de/danielnaber/languagetool/language
parentecaee85ab5984ebadd56721c295dc26b3335f7ce (diff)
downloadgrammar-norwegian-master.tar.gz
grammar-norwegian-master.tar.bz2
grammar-norwegian-master.tar.xz
added more files, to complete languagetool uploadHEADmaster
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/language')
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java72
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java104
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java91
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java63
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java73
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java78
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java60
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java99
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java103
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java72
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java90
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java87
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java74
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java80
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java70
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java116
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java112
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java114
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java75
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java94
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java75
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java73
27 files changed, 2268 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java
new file mode 100644
index 0000000..fb1df60
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.be.BelarusianTagger;
+
+/**
+ * Belarusian language declarations.
+ *
+ * Copyright (C) 2010 Alex Buloichik (alex73mail@gmail.com)
+ */
+public class Belarusian extends Language {
+
+ private static final String[] COUNTRIES = { "BY" };
+
+ private final Tagger tagger = new BelarusianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Belarusian";
+ }
+
+ public String getShortName() {
+ return "be";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] { new Contributor("Alex Buloichik") };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java
new file mode 100644
index 0000000..77d79ae
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java
@@ -0,0 +1,104 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+/*import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer; */
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+/*import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator;*/
+/*import de.danielnaber.languagetool.tagging.en.EnglishTagger;*/
+import de.danielnaber.languagetool.tagging.nb.BokmalTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+/*import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer;*/
+
+public class Bokmal extends Language {
+
+ private final Tagger tagger = new BokmalTagger();
+// private final Tokenizer wordTokenizer = new BokmalWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nb");
+// private final Synthesizer synthesizer = new BokmalSynthesizer();
+// private final Disambiguator disambiguator = new BokmalRuleDisambiguator();
+
+ private static final String[] COUNTRIES = {"NO"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Bokmal";
+ }
+
+ public final String getShortName() {
+ return "nb";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+/* public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }*/
+
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Arno Teigseth")/*,
+ new Contributor("Arno Teigseth")*/};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("EN_UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to English:
+// ids.add("EN_A_VS_AN");
+// ids.add("EN_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java
new file mode 100644
index 0000000..4e0eb67
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java
@@ -0,0 +1,91 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ca.CatalanSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.ca.CatalanTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.rules.ca.CastellanismesReplaceRule;
+import de.danielnaber.languagetool.rules.ca.AccentuacioReplaceRule;
+
+public class Catalan extends Language {
+
+ private final Tagger tagger = new CatalanTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ca");
+ private final Synthesizer synthesizer = new CatalanSynthesizer();
+// private CastellanismesReplaceRule castella = new CastellanismesReplaceRule();
+
+ private static final String[] COUNTRIES = {
+ "ES"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Catalan";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public String getShortName() {
+ return "ca";
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Ricard Roca")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add(CastellanismesReplaceRule.CATALAN_CASTELLANISMES_REPLACE_RULE);
+ ids.add(AccentuacioReplaceRule.CATALAN_ACCENTUACIO_REPLACE_RULE);
+ return ids;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java
new file mode 100644
index 0000000..e38d635
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java
@@ -0,0 +1,63 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+/**
+ * A person that contributed rules or code to LanguageTool.
+ *
+ * @author Daniel Naber
+ */
+public class Contributor {
+
+ private final String name;
+ private String remark;
+ private String url;
+
+ Contributor(String name) {
+ if (name == null) {
+ throw new NullPointerException("name cannot be null");
+ }
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public final String toString() {
+ return getName();
+ }
+
+ public String getRemark() {
+ return remark;
+ }
+
+ public void setRemark(final String remark) {
+ this.remark = remark;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(final String url) {
+ this.url = url;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java
new file mode 100644
index 0000000..d3154d7
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java
@@ -0,0 +1,73 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.cs.CzechTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.cs.CzechSentenceTokenizer;
+
+public class Czech extends Language {
+
+ private final Tagger tagger = new CzechTagger();
+ private final SentenceTokenizer sentenceTokenizer = new CzechSentenceTokenizer();
+
+ private static final String[] COUNTRIES = {"CZ"};
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Czech";
+ }
+
+ public String getShortName() {
+ return "cs";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Jozef Ličko")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java
new file mode 100644
index 0000000..d114c40
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java
@@ -0,0 +1,78 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.da.DanishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Danish extends Language {
+
+ private final Tagger tagger = new DanishTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("da");
+
+ private static final String[] COUNTRIES = {"DK"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Danish";
+ }
+
+ public final String getShortName() {
+ return "da";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Esben Aaberg")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS"); // correction for genitive apostrophes eg. "Lis' hund" made in UnpairedQuotesBracketsRule
+ ids.add("UPPERCASE_SENTENCE_START"); // abbreviation exceptions, done in DanishSentenceTokenizer
+ // "WORD_REPEAT_RULE" implemented in grammar.xml
+ ids.add("WHITESPACE_RULE");
+ // specific to Danish:
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java
new file mode 100644
index 0000000..ab4284b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+
+public class Demo extends Language {
+
+ private final Tagger tagger = new DemoTagger();
+
+ public Locale getLocale() {
+ return new Locale("en");
+ }
+
+ public String getName() {
+ return "Testlanguage";
+ }
+
+ public String getShortName() {
+ return "xx";
+ }
+
+ public String[] getCountryVariants() {
+ return new String[] {"XX"};
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return null;
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ return null;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java
new file mode 100644
index 0000000..0670736
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java
@@ -0,0 +1,99 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.nl.DutchSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.nl.DutchRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.nl.DutchTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.nl.DutchWordTokenizer;
+
+public class Dutch extends Language {
+
+ private final Tagger tagger = new DutchTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nl");
+ private final Synthesizer synthesizer = new DutchSynthesizer();
+ private final Disambiguator disambiguator = new DutchRuleDisambiguator();
+ private final Tokenizer wdTokenizer = new DutchWordTokenizer();
+
+ private static final String[] COUNTRIES = { "NL", "BE" };
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Dutch";
+ }
+
+ public final String getShortName() {
+ return "nl";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wdTokenizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public final Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Ruud Baars");
+ contributor.setUrl("http://www.opentaal.org");
+ return new Contributor[] { contributor };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java
new file mode 100644
index 0000000..0bf16e8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java
@@ -0,0 +1,103 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.en.EnglishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer;
+
+public class English extends Language {
+
+ private final Tagger tagger = new EnglishTagger();
+ private final Tokenizer wordTokenizer = new EnglishWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en");
+ private final Synthesizer synthesizer = new EnglishSynthesizer();
+ private final Disambiguator disambiguator = new EnglishRuleDisambiguator();
+
+ private static final String[] COUNTRIES = {"GB", "US", "AU", "CA", "NZ", "ZA" };
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "English";
+ }
+
+ public final String getShortName() {
+ return "en";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Marcin Miłkowski"),
+ new Contributor("Daniel Naber")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("EN_UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to English:
+ ids.add("EN_A_VS_AN");
+ ids.add("EN_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java
new file mode 100644
index 0000000..0e48d98
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.eo.EsperantoTagger;
+
+public class Esperanto extends Language {
+
+ private final Tagger tagger = new EsperantoTagger();
+
+ public Locale getLocale() {
+ return new Locale("eo");
+ }
+
+ public String getName() {
+ return "Esperanto";
+ }
+
+ public String getShortName() {
+ return "eo";
+ }
+
+ public String[] getCountryVariants() {
+ /* return "ANY" country code as a "country-less" placeholder for OOo: */
+ return new String[] {"ANY"};
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {
+ new Contributor("Dominique Pellé")
+ };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ ids.add("FRENCH_WHITESPACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java
new file mode 100644
index 0000000..96dc5fc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java
@@ -0,0 +1,90 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.fr.FrenchRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.fr.FrenchTagger;
+
+public class French extends Language {
+
+ private final Tagger tagger = new FrenchTagger();
+ private final Disambiguator disambiguator = new FrenchRuleDisambiguator();
+ private static final Unifier FRENCH_UNIFIER = new Unifier();
+
+ private static final String[] COUNTRIES = {"FR", "", "BE", "CH", "CA",
+ "LU", "MC", "CM", "CI", "HI", "ML", "SN", "CD", "MA", "RE"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "French";
+ }
+
+ public String getShortName() {
+ return "fr";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public Unifier getUnifier() {
+ return FRENCH_UNIFIER;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor hVoisard = new Contributor("Hugo Voisard");
+ hVoisard.setRemark("2006-2007");
+ return new Contributor[] {
+ new Contributor("Agnes Souque"),
+ hVoisard
+ };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add("FRENCH_WHITESPACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java
new file mode 100644
index 0000000..abd2158
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.gl.GalicianTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.gl.GalicianWordTokenizer;
+
+public class Galician extends Language {
+
+ private final Tagger tagger = new GalicianTagger();
+ private final Tokenizer wordTokenizer = new GalicianWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("gl");
+
+ private static final String[] COUNTRIES = {"ES"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Galician";
+ }
+
+ public final String getShortName() {
+ return "gl";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Susana Sotelo Docío");
+ contributor.setUrl("http://www.g11n.net/languagetool-gl");
+ return new Contributor[] { contributor };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java
new file mode 100644
index 0000000..2df4cd4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java
@@ -0,0 +1,87 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.de.GermanTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class German extends Language {
+
+ private final Tagger tagger = new GermanTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("de");
+
+ private static final String[] COUNTRIES = {
+ "DE", "CH", "AT", "LU", "LI", "BE"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "German";
+ }
+
+ public String getShortName() {
+ return "de";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Daniel Naber")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("GERMAN_WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to German:
+ ids.add("DE_AGREEMENT");
+ ids.add("DE_CASE");
+ ids.add("DE_COMPOUNDS");
+ ids.add("DE_DASH");
+ ids.add("DE_WORD_COHERENCY");
+ ids.add("DE_WIEDER_VS_WIDER");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java
new file mode 100644
index 0000000..e48fb6a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.language;
+
+/**
+ * @author Anton Karl Ingason
+ */
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Icelandic extends Language {
+
+ private final Tagger tagger = new DemoTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("is");
+ private static final String[] COUNTRIES = { "IS" };
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ @Override
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ @Override
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Anton Karl Ingason")};
+ }
+
+ @Override
+ public String getName() {
+ return "Icelandic";
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ @Override
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+ @Override
+ public String getShortName() {
+ return "is";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java
new file mode 100644
index 0000000..986b7f5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java
@@ -0,0 +1,74 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.it.ItalianTagger;
+
+public class Italian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "IT", "CH"
+ };
+
+ private final Tagger tagger = new ItalianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Italian";
+ }
+
+ public String getShortName() {
+ return "it";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Paolo Bianchini");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java
new file mode 100644
index 0000000..201a8b5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.io.File;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Create a language by specifying the language's XML rule file.
+ *
+ * @author Daniel Naber
+ */
+public class LanguageBuilder {
+
+ private LanguageBuilder() {
+ }
+
+ /**
+ * Takes an XML file named <tt>rules-xx-language.xml</tt>,
+ * e.g. <tt>rules-de-German.xml</tt> and builds
+ * a Language object for that language.
+ */
+ public static Language makeLanguage(final File file) {
+ if (file == null) {
+ throw new NullPointerException("file argument cannot be null");
+ }
+ if (!file.getName().endsWith(".xml")) {
+ throw new RuleFilenameException(file);
+ }
+ final String[] parts = file.getName().split("-");
+ if (parts.length != 3 || !parts[0].equals("rules") || parts[1].length() != 2) {
+ throw new RuleFilenameException(file);
+ }
+
+ final Language newLanguage = new Language() {
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+ public Contributor[] getMaintainers() {
+ return null;
+ }
+ public String getShortName() {
+ return parts[1];
+ }
+ public String[] getCountryVariants() {
+ return new String[] {""};
+ }
+ public String getName() {
+ return parts[2].replace(".xml", "");
+ }
+ public Set<String> getRelevantRuleIDs() {
+ return null;
+ }
+ public String getRuleFileName() {
+ return file.getAbsolutePath();
+ }
+ };
+ return newLanguage;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java
new file mode 100644
index 0000000..6401195
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java
@@ -0,0 +1,70 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+
+public class Lithuanian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "LT"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Lithuanian";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public String getShortName() {
+ return "lt";
+ }
+
+ public Tagger getTagger() {
+ return new DemoTagger();
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Mantas Kriaučiūnas")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java
new file mode 100644
index 0000000..f15ca5c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.ml.MalayalamTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.ml.MalayalamWordTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+public class Malayalam extends Language {
+
+ private final Tagger tagger = new MalayalamTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en");
+ private final Tokenizer wordTokenizer = new MalayalamWordTokenizer();
+
+ private static final String[] COUNTRIES = {"IN"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Malayalam";
+ }
+
+ public final String getShortName() {
+ return "ml";
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Jithesh.V.S")
+ };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Malayalam...:
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java
new file mode 100644
index 0000000..13b4faf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java
@@ -0,0 +1,116 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.pl.PolishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.pl.PolishHybridDisambiguator;
+import de.danielnaber.languagetool.tagging.pl.PolishTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+public class Polish extends Language {
+
+ private final Tagger tagger = new PolishTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("pl");
+ private final Disambiguator disambiguator = new PolishHybridDisambiguator();
+ private final Synthesizer synthesizer = new PolishSynthesizer();
+ private static final Unifier POLISH_UNIFIER = new Unifier();
+ private static final Unifier POLISH_DISAMB_UNIFIER = new Unifier();
+
+ private static final String[] COUNTRIES = {"PL"};
+
+ @Override
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ @Override
+ public String getName() {
+ return "Polish";
+ }
+
+ @Override
+ public String getShortName() {
+ return "pl";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ @Override
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ @Override
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ @Override
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public Unifier getUnifier() {
+ return POLISH_UNIFIER;
+ }
+
+ public Unifier getDisambiguationUnifier() {
+ return POLISH_DISAMB_UNIFIER;
+ }
+
+ @Override
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ @Override
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Marcin Miłkowski")};
+ }
+
+ @Override
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Polish:
+ ids.add("PL_UNPAIRED_BRACKETS");
+ ids.add("PL_WORD_REPEAT");
+ ids.add("PL_COMPOUNDS");
+ ids.add("PL_SIMPLE_REPLACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java
new file mode 100644
index 0000000..96d6a6b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java
@@ -0,0 +1,112 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.ro.CompoundRule;
+import de.danielnaber.languagetool.rules.ro.SimpleReplaceRule;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ro.RomanianSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.ro.RomanianRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.ro.RomanianTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.ro.RomanianWordTokenizer;
+
+/**
+ *
+ * @author Ionuț Păduraru
+ * @since 24.02.2009 22:18:21
+ */
+public class Romanian extends Language {
+
+ private static final String[] COUNTRIES = { "RO" };
+
+ private final Tagger tagger = new RomanianTagger();
+ private final Synthesizer synthesizer = new RomanianSynthesizer();
+ private final Disambiguator disambiguator = new RomanianRuleDisambiguator();
+ private final Tokenizer wdTokenizer = new RomanianWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ro");
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Romanian";
+ }
+
+ public String getShortName() {
+ return "ro";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Ionuț Păduraru");
+ contributor.setUrl("http://www.archeus.ro");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ // specific to romanian
+ ids.add(SimpleReplaceRule.ROMANIAN_SIMPLE_REPLACE_RULE);
+ ids.add(CompoundRule.ROMANIAN_COMPOUND_RULE);
+
+ return ids;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wdTokenizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java
new file mode 100644
index 0000000..715bdc9
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.io.File;
+
+/**
+ * Thrown if external rule filename doesn't match the required format.
+ *
+ * @author Daniel Naber
+ */
+public class RuleFilenameException extends RuntimeException {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 6642163394764392897L;
+
+ public RuleFilenameException(File file) {
+ super("Rule file must be named rules-<xx>-<lang>.xml (<xx> = language code, " +
+ "<lang> = language name),\n" +
+ "for example: rules-en-English.xml\n" +
+ "Current name: " + file.getName());
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java
new file mode 100644
index 0000000..8491d65
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java
@@ -0,0 +1,114 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ru.RussianSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.ru.RussianRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.ru.RussianTagger;
+//import de.danielnaber.languagetool.tokenizers.Tokenizer;
+//import de.danielnaber.languagetool.tokenizers.ru.RussianWordTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; // new Tokenizer
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+// import de.danielnaber.languagetool.tokenizers.ru.RussianSentenceTokenizer; // old Tokenizer
+
+
+public class Russian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "RU"
+ };
+
+ private final Tagger tagger = new RussianTagger();
+ private final Disambiguator disambiguator = new RussianRuleDisambiguator();
+ private static final Unifier RUSSIAN_UNIFIER = new Unifier();
+// private Tokenizer wordTokenizer = new RussianWordTokenizer();
+ private final Synthesizer synthesizer = new RussianSynthesizer();
+// private SentenceTokenizer sentenceTokenizer = new RussianSentenceTokenizer(); // old Tokenizer
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ru"); // new Tokenizer
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Russian";
+ }
+
+ public String getShortName() {
+ return "ru";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+// public Tokenizer getWordTokenizer() {
+// return wordTokenizer;
+// }
+
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+
+ public Unifier getUnifier() {
+ return RUSSIAN_UNIFIER;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Yakov Reztsov")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Russian :
+ ids.add("RU_UNPAIRED_BRACKETS");
+ ids.add("RU_COMPOUNDS");
+ ids.add("RU_SIMPLE_REPLACE");
+ return ids;
+
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java
new file mode 100644
index 0000000..eecb54b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java
@@ -0,0 +1,93 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.sk.SlovakTagger;
+import de.danielnaber.languagetool.synthesis.sk.SlovakSynthesizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Slovak extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sk");
+ private final Tagger tagger = new SlovakTagger();
+ private final Synthesizer synthesizer = new SlovakSynthesizer();
+
+ private static final String[] COUNTRIES = {
+ "SK"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Slovak";
+ }
+
+ public String getShortName() {
+ return "sk";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ @Override
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Zdenko Podobný");
+ contributor.setUrl("http://sk-spell.sk.cx");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Slovak:
+ ids.add("SK_COMPOUNDS");
+ ids.add("SK_VES");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java
new file mode 100644
index 0000000..cc945f3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java
@@ -0,0 +1,75 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Slovenian extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sl");
+
+ private static final String[] COUNTRIES = {
+ "SI"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Slovenian";
+ }
+
+ public String getShortName() {
+ return "sl";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Martin Srebotnjak")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Slovenian: none
+ return ids;
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java
new file mode 100644
index 0000000..ba646d6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java
@@ -0,0 +1,94 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.es.SpanishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.es.SpanishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Spanish extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("es");
+ private final Synthesizer synthesizer = new SpanishSynthesizer();
+
+ private static final String[] COUNTRIES = {
+ "ES", "", "MX", "GT", "CR", "PA", "DO",
+ "VE", "PE", "AR", "EC", "CL", "UY", "PY",
+ "BO", "SV", "HN", "NI", "PR", "US", "CU"
+ };
+
+ private final Tagger tagger = new SpanishTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Spanish";
+ }
+
+ public String getShortName() {
+ return "es";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Juan Martorell");
+ contributor.setUrl("http://languagetool-es.blogspot.com/");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Spanish:
+ // ids.add("EL_WITH_FEM");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java
new file mode 100644
index 0000000..1b99f9a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java
@@ -0,0 +1,75 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.sv.SwedishTagger;
+
+public class Swedish extends Language {
+
+ private static final String[] COUNTRIES = {
+ "SE", "FI"
+ };
+
+ private final Tagger tagger = new SwedishTagger();
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Swedish";
+ }
+
+ public final String getShortName() {
+ return "sv";
+ }
+
+ @Override
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Niklas Johansson")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Swedish:
+ ids.add("SV_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java
new file mode 100644
index 0000000..c426100
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java
@@ -0,0 +1,73 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.uk.UkrainianTagger;
+
+public class Ukrainian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "UA"
+ };
+
+ private final Tagger tagger = new UkrainianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Ukrainian";
+ }
+
+ public String getShortName() {
+ return "uk";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Andriy Rysin")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ // specific to Ukrainian:
+ ids.add("UK_SIMPLE_REPLACE");
+ return ids;
+ }
+
+}