diff options
author | Arno Teigseth <arno@teigseth.no> | 2011-02-05 08:48:27 +0000 |
---|---|---|
committer | Arno Teigseth <arno@teigseth.no> | 2011-02-05 08:48:27 +0000 |
commit | 4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch) | |
tree | 7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de/danielnaber/languagetool/language | |
parent | ecaee85ab5984ebadd56721c295dc26b3335f7ce (diff) | |
download | grammar-norwegian-master.tar.gz grammar-norwegian-master.tar.bz2 grammar-norwegian-master.tar.xz |
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/language')
27 files changed, 2268 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java new file mode 100644 index 0000000..fb1df60 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.be.BelarusianTagger; + +/** + * Belarusian language declarations. + * + * Copyright (C) 2010 Alex Buloichik (alex73mail@gmail.com) + */ +public class Belarusian extends Language { + + private static final String[] COUNTRIES = { "BY" }; + + private final Tagger tagger = new BelarusianTagger(); + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Belarusian"; + } + + public String getShortName() { + return "be"; + } + + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + return new Contributor[] { new Contributor("Alex Buloichik") }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + return ids; + } +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java new file mode 100644 index 0000000..77d79ae --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java @@ -0,0 +1,104 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +/*import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer; */ +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +/*import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator;*/ +/*import de.danielnaber.languagetool.tagging.en.EnglishTagger;*/ +import de.danielnaber.languagetool.tagging.nb.BokmalTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +/*import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer;*/ + +public class Bokmal extends Language { + + private final Tagger tagger = new BokmalTagger(); +// private final Tokenizer wordTokenizer = new BokmalWordTokenizer(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nb"); +// private final Synthesizer synthesizer = new BokmalSynthesizer(); +// private final Disambiguator disambiguator = new BokmalRuleDisambiguator(); + + private static final String[] COUNTRIES = {"NO"}; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final String getName() { + return "Bokmal"; + } + + public final String getShortName() { + return "nb"; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + +/* public final Tokenizer getWordTokenizer() { + return wordTokenizer; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final Disambiguator getDisambiguator() { + return disambiguator; + }*/ + + + public final Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Arno Teigseth")/*, + new Contributor("Arno Teigseth")*/}; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("EN_UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to English: +// ids.add("EN_A_VS_AN"); +// ids.add("EN_COMPOUNDS"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java new file mode 100644 index 0000000..4e0eb67 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java @@ -0,0 +1,91 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.ca.CatalanSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.ca.CatalanTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.rules.ca.CastellanismesReplaceRule; +import de.danielnaber.languagetool.rules.ca.AccentuacioReplaceRule; + +public class Catalan extends Language { + + private final Tagger tagger = new CatalanTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ca"); + private final Synthesizer synthesizer = new CatalanSynthesizer(); +// private CastellanismesReplaceRule castella = new CastellanismesReplaceRule(); + + private static final String[] COUNTRIES = { + "ES" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Catalan"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public String getShortName() { + return "ca"; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Ricard Roca")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + ids.add(CastellanismesReplaceRule.CATALAN_CASTELLANISMES_REPLACE_RULE); + ids.add(AccentuacioReplaceRule.CATALAN_ACCENTUACIO_REPLACE_RULE); + return ids; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java new file mode 100644 index 0000000..e38d635 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java @@ -0,0 +1,63 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +/** + * A person that contributed rules or code to LanguageTool. + * + * @author Daniel Naber + */ +public class Contributor { + + private final String name; + private String remark; + private String url; + + Contributor(String name) { + if (name == null) { + throw new NullPointerException("name cannot be null"); + } + this.name = name; + } + + public String getName() { + return name; + } + + public final String toString() { + return getName(); + } + + public String getRemark() { + return remark; + } + + public void setRemark(final String remark) { + this.remark = remark; + } + + public String getUrl() { + return url; + } + + public void setUrl(final String url) { + this.url = url; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java new file mode 100644 index 0000000..d3154d7 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java @@ -0,0 +1,73 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.cs.CzechTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.cs.CzechSentenceTokenizer; + +public class Czech extends Language { + + private final Tagger tagger = new CzechTagger(); + private final SentenceTokenizer sentenceTokenizer = new CzechSentenceTokenizer(); + + private static final String[] COUNTRIES = {"CZ"}; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Czech"; + } + + public String getShortName() { + return "cs"; + } + + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Jozef Ličko")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java new file mode 100644 index 0000000..d114c40 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java @@ -0,0 +1,78 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.da.DanishTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class Danish extends Language { + + private final Tagger tagger = new DanishTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("da"); + + private static final String[] COUNTRIES = {"DK"}; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final String getName() { + return "Danish"; + } + + public final String getShortName() { + return "da"; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Esben Aaberg")}; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); // correction for genitive apostrophes eg. "Lis' hund" made in UnpairedQuotesBracketsRule + ids.add("UPPERCASE_SENTENCE_START"); // abbreviation exceptions, done in DanishSentenceTokenizer + // "WORD_REPEAT_RULE" implemented in grammar.xml + ids.add("WHITESPACE_RULE"); + // specific to Danish: + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java new file mode 100644 index 0000000..ab4284b --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java @@ -0,0 +1,60 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.xx.DemoTagger; + +public class Demo extends Language { + + private final Tagger tagger = new DemoTagger(); + + public Locale getLocale() { + return new Locale("en"); + } + + public String getName() { + return "Testlanguage"; + } + + public String getShortName() { + return "xx"; + } + + public String[] getCountryVariants() { + return new String[] {"XX"}; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + return null; + } + + public Set<String> getRelevantRuleIDs() { + return null; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java new file mode 100644 index 0000000..0670736 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java @@ -0,0 +1,99 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.nl.DutchSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.rules.nl.DutchRuleDisambiguator; +import de.danielnaber.languagetool.tagging.nl.DutchTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tokenizers.nl.DutchWordTokenizer; + +public class Dutch extends Language { + + private final Tagger tagger = new DutchTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nl"); + private final Synthesizer synthesizer = new DutchSynthesizer(); + private final Disambiguator disambiguator = new DutchRuleDisambiguator(); + private final Tokenizer wdTokenizer = new DutchWordTokenizer(); + + private static final String[] COUNTRIES = { "NL", "BE" }; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final String getName() { + return "Dutch"; + } + + public final String getShortName() { + return "nl"; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final Tokenizer getWordTokenizer() { + return wdTokenizer; + } + + public final Disambiguator getDisambiguator() { + return disambiguator; + } + + public final Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Ruud Baars"); + contributor.setUrl("http://www.opentaal.org"); + return new Contributor[] { contributor }; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java new file mode 100644 index 0000000..0bf16e8 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java @@ -0,0 +1,103 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator; +import de.danielnaber.languagetool.tagging.en.EnglishTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer; + +public class English extends Language { + + private final Tagger tagger = new EnglishTagger(); + private final Tokenizer wordTokenizer = new EnglishWordTokenizer(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en"); + private final Synthesizer synthesizer = new EnglishSynthesizer(); + private final Disambiguator disambiguator = new EnglishRuleDisambiguator(); + + private static final String[] COUNTRIES = {"GB", "US", "AU", "CA", "NZ", "ZA" }; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final String getName() { + return "English"; + } + + public final String getShortName() { + return "en"; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Tokenizer getWordTokenizer() { + return wordTokenizer; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final Disambiguator getDisambiguator() { + return disambiguator; + } + + + public final Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Marcin Miłkowski"), + new Contributor("Daniel Naber")}; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("EN_UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to English: + ids.add("EN_A_VS_AN"); + ids.add("EN_COMPOUNDS"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java new file mode 100644 index 0000000..0e48d98 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.eo.EsperantoTagger; + +public class Esperanto extends Language { + + private final Tagger tagger = new EsperantoTagger(); + + public Locale getLocale() { + return new Locale("eo"); + } + + public String getName() { + return "Esperanto"; + } + + public String getShortName() { + return "eo"; + } + + public String[] getCountryVariants() { + /* return "ANY" country code as a "country-less" placeholder for OOo: */ + return new String[] {"ANY"}; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + return new Contributor[] { + new Contributor("Dominique Pellé") + }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + ids.add("FRENCH_WHITESPACE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java new file mode 100644 index 0000000..96dc5fc --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java @@ -0,0 +1,90 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.patterns.Unifier; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.rules.fr.FrenchRuleDisambiguator; +import de.danielnaber.languagetool.tagging.fr.FrenchTagger; + +public class French extends Language { + + private final Tagger tagger = new FrenchTagger(); + private final Disambiguator disambiguator = new FrenchRuleDisambiguator(); + private static final Unifier FRENCH_UNIFIER = new Unifier(); + + private static final String[] COUNTRIES = {"FR", "", "BE", "CH", "CA", + "LU", "MC", "CM", "CI", "HI", "ML", "SN", "CD", "MA", "RE" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "French"; + } + + public String getShortName() { + return "fr"; + } + + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Disambiguator getDisambiguator() { + return disambiguator; + } + + public Unifier getUnifier() { + return FRENCH_UNIFIER; + } + + public Contributor[] getMaintainers() { + final Contributor hVoisard = new Contributor("Hugo Voisard"); + hVoisard.setRemark("2006-2007"); + return new Contributor[] { + new Contributor("Agnes Souque"), + hVoisard + }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + ids.add("FRENCH_WHITESPACE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java new file mode 100644 index 0000000..abd2158 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java @@ -0,0 +1,86 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.gl.GalicianTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tokenizers.gl.GalicianWordTokenizer; + +public class Galician extends Language { + + private final Tagger tagger = new GalicianTagger(); + private final Tokenizer wordTokenizer = new GalicianWordTokenizer(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("gl"); + + private static final String[] COUNTRIES = {"ES"}; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final String getName() { + return "Galician"; + } + + public final String getShortName() { + return "gl"; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Tokenizer getWordTokenizer() { + return wordTokenizer; + } + + public Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Susana Sotelo Docío"); + contributor.setUrl("http://www.g11n.net/languagetool-gl"); + return new Contributor[] { contributor }; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java new file mode 100644 index 0000000..2df4cd4 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java @@ -0,0 +1,87 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.de.GermanTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class German extends Language { + + private final Tagger tagger = new GermanTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("de"); + + private static final String[] COUNTRIES = { + "DE", "CH", "AT", "LU", "LI", "BE" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "German"; + } + + public String getShortName() { + return "de"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Daniel Naber")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("GERMAN_WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to German: + ids.add("DE_AGREEMENT"); + ids.add("DE_CASE"); + ids.add("DE_COMPOUNDS"); + ids.add("DE_DASH"); + ids.add("DE_WORD_COHERENCY"); + ids.add("DE_WIEDER_VS_WIDER"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java new file mode 100644 index 0000000..e48fb6a --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java @@ -0,0 +1,86 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool.language; + +/** + * @author Anton Karl Ingason + */ + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.xx.DemoTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class Icelandic extends Language { + + private final Tagger tagger = new DemoTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("is"); + private static final String[] COUNTRIES = { "IS" }; + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + @Override + public Locale getLocale() { + return new Locale(getShortName()); + } + + @Override + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Anton Karl Ingason")}; + } + + @Override + public String getName() { + return "Icelandic"; + } + + public Tagger getTagger() { + return tagger; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + @Override + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + return ids; + } + + @Override + public String getShortName() { + return "is"; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java new file mode 100644 index 0000000..986b7f5 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java @@ -0,0 +1,74 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.it.ItalianTagger; + +public class Italian extends Language { + + private static final String[] COUNTRIES = { + "IT", "CH" + }; + + private final Tagger tagger = new ItalianTagger(); + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Italian"; + } + + public String getShortName() { + return "it"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Paolo Bianchini"); + return new Contributor[] { contributor }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java new file mode 100644 index 0000000..201a8b5 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java @@ -0,0 +1,80 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.io.File; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; + +/** + * Create a language by specifying the language's XML rule file. + * + * @author Daniel Naber + */ +public class LanguageBuilder { + + private LanguageBuilder() { + } + + /** + * Takes an XML file named <tt>rules-xx-language.xml</tt>, + * e.g. <tt>rules-de-German.xml</tt> and builds + * a Language object for that language. + */ + public static Language makeLanguage(final File file) { + if (file == null) { + throw new NullPointerException("file argument cannot be null"); + } + if (!file.getName().endsWith(".xml")) { + throw new RuleFilenameException(file); + } + final String[] parts = file.getName().split("-"); + if (parts.length != 3 || !parts[0].equals("rules") || parts[1].length() != 2) { + throw new RuleFilenameException(file); + } + + final Language newLanguage = new Language() { + public Locale getLocale() { + return new Locale(getShortName()); + } + public Contributor[] getMaintainers() { + return null; + } + public String getShortName() { + return parts[1]; + } + public String[] getCountryVariants() { + return new String[] {""}; + } + public String getName() { + return parts[2].replace(".xml", ""); + } + public Set<String> getRelevantRuleIDs() { + return null; + } + public String getRuleFileName() { + return file.getAbsolutePath(); + } + }; + return newLanguage; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java new file mode 100644 index 0000000..6401195 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java @@ -0,0 +1,70 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.xx.DemoTagger; + +public class Lithuanian extends Language { + + private static final String[] COUNTRIES = { + "LT" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Lithuanian"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public String getShortName() { + return "lt"; + } + + public Tagger getTagger() { + return new DemoTagger(); + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Mantas Kriaučiūnas")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java new file mode 100644 index 0000000..f15ca5c --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java @@ -0,0 +1,86 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.ml.MalayalamTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.ml.MalayalamWordTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; + +public class Malayalam extends Language { + + private final Tagger tagger = new MalayalamTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en"); + private final Tokenizer wordTokenizer = new MalayalamWordTokenizer(); + + private static final String[] COUNTRIES = {"IN"}; + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public final String getName() { + return "Malayalam"; + } + + public final String getShortName() { + return "ml"; + } + + public final Tokenizer getWordTokenizer() { + return wordTokenizer; + } + + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Jithesh.V.S") + }; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Malayalam...: + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java new file mode 100644 index 0000000..13b4faf --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java @@ -0,0 +1,116 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.patterns.Unifier; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.pl.PolishSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.pl.PolishHybridDisambiguator; +import de.danielnaber.languagetool.tagging.pl.PolishTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; + +public class Polish extends Language { + + private final Tagger tagger = new PolishTagger(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("pl"); + private final Disambiguator disambiguator = new PolishHybridDisambiguator(); + private final Synthesizer synthesizer = new PolishSynthesizer(); + private static final Unifier POLISH_UNIFIER = new Unifier(); + private static final Unifier POLISH_DISAMB_UNIFIER = new Unifier(); + + private static final String[] COUNTRIES = {"PL"}; + + @Override + public Locale getLocale() { + return new Locale(getShortName()); + } + + @Override + public String getName() { + return "Polish"; + } + + @Override + public String getShortName() { + return "pl"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + @Override + public Tagger getTagger() { + return tagger; + } + + @Override + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + @Override + public Disambiguator getDisambiguator() { + return disambiguator; + } + + public Unifier getUnifier() { + return POLISH_UNIFIER; + } + + public Unifier getDisambiguationUnifier() { + return POLISH_DISAMB_UNIFIER; + } + + @Override + public Synthesizer getSynthesizer() { + return synthesizer; + } + + @Override + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Marcin Miłkowski")}; + } + + @Override + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Polish: + ids.add("PL_UNPAIRED_BRACKETS"); + ids.add("PL_WORD_REPEAT"); + ids.add("PL_COMPOUNDS"); + ids.add("PL_SIMPLE_REPLACE"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java new file mode 100644 index 0000000..96d6a6b --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java @@ -0,0 +1,112 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.ro.CompoundRule; +import de.danielnaber.languagetool.rules.ro.SimpleReplaceRule; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.ro.RomanianSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.rules.ro.RomanianRuleDisambiguator; +import de.danielnaber.languagetool.tagging.ro.RomanianTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tokenizers.ro.RomanianWordTokenizer; + +/** + * + * @author Ionuț Păduraru + * @since 24.02.2009 22:18:21 + */ +public class Romanian extends Language { + + private static final String[] COUNTRIES = { "RO" }; + + private final Tagger tagger = new RomanianTagger(); + private final Synthesizer synthesizer = new RomanianSynthesizer(); + private final Disambiguator disambiguator = new RomanianRuleDisambiguator(); + private final Tokenizer wdTokenizer = new RomanianWordTokenizer(); + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ro"); + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Romanian"; + } + + public String getShortName() { + return "ro"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Ionuț Păduraru"); + contributor.setUrl("http://www.archeus.ro"); + return new Contributor[] { contributor }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + // specific to romanian + ids.add(SimpleReplaceRule.ROMANIAN_SIMPLE_REPLACE_RULE); + ids.add(CompoundRule.ROMANIAN_COMPOUND_RULE); + + return ids; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final Disambiguator getDisambiguator() { + return disambiguator; + } + + public final Tokenizer getWordTokenizer() { + return wdTokenizer; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java new file mode 100644 index 0000000..715bdc9 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java @@ -0,0 +1,42 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.io.File; + +/** + * Thrown if external rule filename doesn't match the required format. + * + * @author Daniel Naber + */ +public class RuleFilenameException extends RuntimeException { + + /** + * + */ + private static final long serialVersionUID = 6642163394764392897L; + + public RuleFilenameException(File file) { + super("Rule file must be named rules-<xx>-<lang>.xml (<xx> = language code, " + + "<lang> = language name),\n" + + "for example: rules-en-English.xml\n" + + "Current name: " + file.getName()); + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java new file mode 100644 index 0000000..8491d65 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java @@ -0,0 +1,114 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.rules.patterns.Unifier; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.ru.RussianSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.rules.ru.RussianRuleDisambiguator; +import de.danielnaber.languagetool.tagging.ru.RussianTagger; +//import de.danielnaber.languagetool.tokenizers.Tokenizer; +//import de.danielnaber.languagetool.tokenizers.ru.RussianWordTokenizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; // new Tokenizer +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +// import de.danielnaber.languagetool.tokenizers.ru.RussianSentenceTokenizer; // old Tokenizer + + +public class Russian extends Language { + + private static final String[] COUNTRIES = { + "RU" + }; + + private final Tagger tagger = new RussianTagger(); + private final Disambiguator disambiguator = new RussianRuleDisambiguator(); + private static final Unifier RUSSIAN_UNIFIER = new Unifier(); +// private Tokenizer wordTokenizer = new RussianWordTokenizer(); + private final Synthesizer synthesizer = new RussianSynthesizer(); +// private SentenceTokenizer sentenceTokenizer = new RussianSentenceTokenizer(); // old Tokenizer + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ru"); // new Tokenizer + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Russian"; + } + + public String getShortName() { + return "ru"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Disambiguator getDisambiguator() { + return disambiguator; + } + +// public Tokenizer getWordTokenizer() { +// return wordTokenizer; +// } + + public Synthesizer getSynthesizer() { + return synthesizer; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + + public Unifier getUnifier() { + return RUSSIAN_UNIFIER; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Yakov Reztsov")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Russian : + ids.add("RU_UNPAIRED_BRACKETS"); + ids.add("RU_COMPOUNDS"); + ids.add("RU_SIMPLE_REPLACE"); + return ids; + + } + +}
\ No newline at end of file diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java new file mode 100644 index 0000000..eecb54b --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java @@ -0,0 +1,93 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.sk.SlovakTagger; +import de.danielnaber.languagetool.synthesis.sk.SlovakSynthesizer; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class Slovak extends Language { + + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sk"); + private final Tagger tagger = new SlovakTagger(); + private final Synthesizer synthesizer = new SlovakSynthesizer(); + + private static final String[] COUNTRIES = { + "SK" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Slovak"; + } + + public String getShortName() { + return "sk"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + @Override + public Synthesizer getSynthesizer() { + return synthesizer; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Zdenko Podobný"); + contributor.setUrl("http://sk-spell.sk.cx"); + return new Contributor[] { contributor }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Slovak: + ids.add("SK_COMPOUNDS"); + ids.add("SK_VES"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java new file mode 100644 index 0000000..cc945f3 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java @@ -0,0 +1,75 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class Slovenian extends Language { + + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sl"); + + private static final String[] COUNTRIES = { + "SI" + }; + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Slovenian"; + } + + public String getShortName() { + return "sl"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Martin Srebotnjak")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Slovenian: none + return ids; + + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java new file mode 100644 index 0000000..ba646d6 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java @@ -0,0 +1,94 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.synthesis.es.SpanishSynthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.es.SpanishTagger; +import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; + +public class Spanish extends Language { + + private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("es"); + private final Synthesizer synthesizer = new SpanishSynthesizer(); + + private static final String[] COUNTRIES = { + "ES", "", "MX", "GT", "CR", "PA", "DO", + "VE", "PE", "AR", "EC", "CL", "UY", "PY", + "BO", "SV", "HN", "NI", "PR", "US", "CU" + }; + + private final Tagger tagger = new SpanishTagger(); + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Spanish"; + } + + public String getShortName() { + return "es"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public final Synthesizer getSynthesizer() { + return synthesizer; + } + + public final SentenceTokenizer getSentenceTokenizer() { + return sentenceTokenizer; + } + + public Contributor[] getMaintainers() { + final Contributor contributor = new Contributor("Juan Martorell"); + contributor.setUrl("http://languagetool-es.blogspot.com/"); + return new Contributor[] { contributor }; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Spanish: + // ids.add("EL_WITH_FEM"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java new file mode 100644 index 0000000..1b99f9a --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java @@ -0,0 +1,75 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.sv.SwedishTagger; + +public class Swedish extends Language { + + private static final String[] COUNTRIES = { + "SE", "FI" + }; + + private final Tagger tagger = new SwedishTagger(); + + public final Locale getLocale() { + return new Locale(getShortName()); + } + + public final String getName() { + return "Swedish"; + } + + public final String getShortName() { + return "sv"; + } + + @Override + public final String[] getCountryVariants() { + return COUNTRIES; + } + + public final Tagger getTagger() { + return tagger; + } + + public final Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Niklas Johansson")}; + } + + public final Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UNPAIRED_BRACKETS"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WORD_REPEAT_RULE"); + ids.add("WHITESPACE_RULE"); + // specific to Swedish: + ids.add("SV_COMPOUNDS"); + return ids; + } + +} diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java new file mode 100644 index 0000000..c426100 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java @@ -0,0 +1,73 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool.language; + +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +import de.danielnaber.languagetool.Language; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.uk.UkrainianTagger; + +public class Ukrainian extends Language { + + private static final String[] COUNTRIES = { + "UA" + }; + + private final Tagger tagger = new UkrainianTagger(); + + public Locale getLocale() { + return new Locale(getShortName()); + } + + public String getName() { + return "Ukrainian"; + } + + public String getShortName() { + return "uk"; + } + + @Override + public String[] getCountryVariants() { + return COUNTRIES; + } + + public Tagger getTagger() { + return tagger; + } + + public Contributor[] getMaintainers() { + return new Contributor[] {new Contributor("Andriy Rysin")}; + } + + public Set<String> getRelevantRuleIDs() { + final Set<String> ids = new HashSet<String>(); + ids.add("COMMA_PARENTHESIS_WHITESPACE"); + ids.add("DOUBLE_PUNCTUATION"); + ids.add("UPPERCASE_SENTENCE_START"); + ids.add("WHITESPACE_RULE"); + // specific to Ukrainian: + ids.add("UK_SIMPLE_REPLACE"); + return ids; + } + +} |