diff options
author | Arno Teigseth <arno@teigseth.no> | 2011-02-05 08:48:27 +0000 |
---|---|---|
committer | Arno Teigseth <arno@teigseth.no> | 2011-02-05 08:48:27 +0000 |
commit | 4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch) | |
tree | 7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de/danielnaber/languagetool/Language.java | |
parent | ecaee85ab5984ebadd56721c295dc26b3335f7ce (diff) | |
download | grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.gz grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.bz2 grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.xz |
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/Language.java')
-rw-r--r-- | JLanguageTool/src/java/de/danielnaber/languagetool/Language.java | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java b/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java new file mode 100644 index 0000000..a565058 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java @@ -0,0 +1,336 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package de.danielnaber.languagetool; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.MissingResourceException; +import java.util.ResourceBundle; +import java.util.Set; + +import de.danielnaber.languagetool.language.Belarusian; +import de.danielnaber.languagetool.language.Catalan; +import de.danielnaber.languagetool.language.Contributor; +import de.danielnaber.languagetool.language.Demo; +import de.danielnaber.languagetool.language.Danish; +import de.danielnaber.languagetool.language.Bokmal; +import de.danielnaber.languagetool.language.Dutch; +import de.danielnaber.languagetool.language.English; +import de.danielnaber.languagetool.language.Esperanto; +import de.danielnaber.languagetool.language.French; +import de.danielnaber.languagetool.language.Galician; +import de.danielnaber.languagetool.language.German; +import de.danielnaber.languagetool.language.Icelandic; +import de.danielnaber.languagetool.language.Italian; +import de.danielnaber.languagetool.language.Lithuanian; +import de.danielnaber.languagetool.language.Malayalam; +import de.danielnaber.languagetool.language.Polish; +import de.danielnaber.languagetool.language.Romanian; +import de.danielnaber.languagetool.language.Russian; +import de.danielnaber.languagetool.language.Slovak; +import de.danielnaber.languagetool.language.Slovenian; +import de.danielnaber.languagetool.language.Spanish; +import de.danielnaber.languagetool.language.Swedish; +import de.danielnaber.languagetool.language.Ukrainian; +import de.danielnaber.languagetool.rules.patterns.Unifier; +import de.danielnaber.languagetool.synthesis.Synthesizer; +import de.danielnaber.languagetool.tagging.Tagger; +import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator; +import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator; +import de.danielnaber.languagetool.tagging.xx.DemoTagger; +import de.danielnaber.languagetool.tokenizers.SentenceTokenizer; +import de.danielnaber.languagetool.tokenizers.Tokenizer; +import de.danielnaber.languagetool.tokenizers.WordTokenizer; +import de.danielnaber.languagetool.tools.StringTools; + +/** + * Base class for any supported language (English, German, etc). + * + * @author Daniel Naber + */ +public abstract class Language { + + // NOTE: keep in sync with array below! + //public final static Language CZECH = new Czech(); + public static final Language DANISH = new Danish(); + public static final Language BOKMAL = new Bokmal(); + public static final Language DUTCH = new Dutch(); + public static final Language ENGLISH = new English(); + public static final Language ESPERANTO = new Esperanto(); + public static final Language FRENCH = new French(); + public static final Language GERMAN = new German(); + public static final Language ITALIAN = new Italian(); + public static final Language LITHUANIAN = new Lithuanian(); + public static final Language POLISH = new Polish(); + public static final Language SLOVAK = new Slovak(); + public static final Language SLOVENIAN = new Slovenian(); + public static final Language SPANISH = new Spanish(); + public static final Language SWEDISH = new Swedish(); + public static final Language UKRAINIAN = new Ukrainian(); + public static final Language RUSSIAN = new Russian(); + public static final Language ROMANIAN = new Romanian(); + public static final Language ICELANDIC = new Icelandic(); + public static final Language GALICIAN = new Galician(); + public static final Language CATALAN = new Catalan(); + public static final Language MALAYALAM = new Malayalam(); + public static final Language BELARUSIAN = new Belarusian(); + + public static final Language DEMO = new Demo(); + + private static List<Language> externalLanguages = new ArrayList<Language>(); + + /** + * All languages supported by LanguageTool. + */ + public static Language[] LANGUAGES = { + ENGLISH, GERMAN, POLISH, FRENCH, SPANISH, ITALIAN, DUTCH, LITHUANIAN, UKRAINIAN, RUSSIAN, + SLOVAK, SLOVENIAN, SWEDISH, ROMANIAN, ICELANDIC, GALICIAN, CATALAN, DANISH, + MALAYALAM, BELARUSIAN, ESPERANTO, BOKMAL, + DEMO + // FIXME: load dynamically from classpath + }; + + /** + * All languages supported by LanguageTool, but without the demo language. + */ + public static Language[] REAL_LANGUAGES = new Language[LANGUAGES.length-1]; + static { + int i = 0; + for (final Language lang : LANGUAGES) { + if (lang != DEMO) { + REAL_LANGUAGES[i] = lang; + i++; + } + } + } + + private static final Language[] BUILTIN_LANGUAGES = LANGUAGES; + + private static final Disambiguator DEMO_DISAMBIGUATOR = new DemoDisambiguator(); + private static final Tagger DEMO_TAGGER = new DemoTagger(); + private static final SentenceTokenizer SENTENCE_TOKENIZER = new SentenceTokenizer(); + private static final WordTokenizer WORD_TOKENIZER = new WordTokenizer(); + private static final Unifier MATCH_UNIFIER = new Unifier(); + + // ------------------------------------------------------------------------- + + /** + * Get this language's two character code, e.g. <code>en</code> for English. + * @return String - language code + */ + public abstract String getShortName(); + + /** + * Get this language's name in English, e.g. <code>English</code> or <code>German</code>. + * @return String - language name + */ + public abstract String getName(); + + /** + * Get this language's variants, e.g. <code>US</code> (as in <code>en_US</code>) or + * <code>PL</code> (as in <code>pl_PL</code>). + * @return String[] - array of country variants for the language. + */ + public abstract String[] getCountryVariants(); + + /** + * Get this language's Java locale. + */ + public abstract Locale getLocale(); + + /** + * Get the name(s) of the maintainer(s) for this language or <code>null</code>. + */ + public abstract Contributor[] getMaintainers(); + + /** + * Get the IDs of the global rules that should run for texts in this language + * or <code>null</code>. + */ + public abstract Set<String> getRelevantRuleIDs(); + + // ------------------------------------------------------------------------- + + /** + * Get the location of the rule file. + */ + public String getRuleFileName() { + return JLanguageTool.getDataBroker().getRulesDir() + "/" + getShortName() + "/" + JLanguageTool.PATTERN_FILE; + } + + /** + * Get this language's part-of-speech disambiguator implementation. + */ + public Disambiguator getDisambiguator() { + return DEMO_DISAMBIGUATOR; + } + + /** + * Get this language's part-of-speech tagger implementation. + */ + public Tagger getTagger() { + return DEMO_TAGGER; + } + + /** + * Get this language's sentence tokenizer implementation. + */ + public SentenceTokenizer getSentenceTokenizer() { + return SENTENCE_TOKENIZER; + } + + /** + * Get this language's word tokenizer implementation. + */ + public Tokenizer getWordTokenizer() { + return WORD_TOKENIZER; + } + + /** + * Get this language's part-of-speech synthesizer implementation or <code>null</code>. + */ + public Synthesizer getSynthesizer() { + return null; + } + + /** + * Get this language's feature unifier. + * @return Feature unifier for analyzed tokens. + */ + public Unifier getUnifier() { + return MATCH_UNIFIER; + } + + /** + * Get this language's feature unifier used for disambiguation. + * Note: it might be different from the normal rule unifier. + * @return Feature unifier for analyzed tokens. + */ + public Unifier getDisambiguationUnifier() { + return MATCH_UNIFIER; + } + + /** + * Get the name of the language translated to the current locale, + * if available. Otherwise, get the untranslated name. + */ + public final String getTranslatedName(final ResourceBundle messages) { + try { + return messages.getString(getShortName()); + } catch (final MissingResourceException e) { + return getName(); + } + } + + // ------------------------------------------------------------------------- + + /** + * Re-inits the built-in languages and adds the specified ones. + */ + public static void reInit(final List<Language> languages) { + LANGUAGES = new Language[BUILTIN_LANGUAGES.length + languages.size()]; + int i = BUILTIN_LANGUAGES.length; + System.arraycopy(BUILTIN_LANGUAGES, 0, + LANGUAGES, 0, BUILTIN_LANGUAGES.length); + for (final Language lang : languages) { + LANGUAGES[i++] = lang; + } + externalLanguages = languages; + } + + /** + * Return languages that are not built-in but have been added manually. + */ + public static List<Language> getExternalLanguages() { + return externalLanguages; + } + + /** + * Get the Language object for the given short language name. + * + * @param shortLanguageCode e.g. <code>en</code> or <code>de</code> + * @return a Language object or <code>null</code> + */ + public static Language getLanguageForShortName(final String shortLanguageCode) { + StringTools.assureSet(shortLanguageCode, "shortLanguageCode"); + if (shortLanguageCode.length() != "xx".length()) { + throw new IllegalArgumentException("'" + shortLanguageCode + "' isn't a two-character code"); + } + for (Language element : Language.LANGUAGES) { + if (shortLanguageCode.equals(element.getShortName())) { + return element; + } + } + return null; + } + + /** + * Get the Language object for the given language name. + * + * @param languageName e.g. <code>English</code> or <code>German</code> (case is significant) + * @return a Language object or <code>null</code> + */ + public static Language getLanguageForName(final String languageName) { + for (Language element : Language.LANGUAGES) { + if (languageName.equals(element.getName())) { + return element; + } + } + return null; + } + + @Override + public final String toString() { + return getName(); + } + + /** + * Get sorted info about all maintainers. + * @since 0.9.9 + * @param messages + * {{@link ResourceBundle} language bundle to translate + * the info + * @return + * A sorted list of maintainers. + */ + public static String getAllMaintainers(final ResourceBundle messages) { + final StringBuilder maintainersInfo = new StringBuilder(); + final List<String> toSort = new ArrayList<String>(); + for (final Language lang : Language.LANGUAGES) { + if (lang != Language.DEMO) { + if (lang.getMaintainers() != null) { + final List<String> names = new ArrayList<String>(); + for (Contributor contributor : lang.getMaintainers()) { + names.add(contributor.getName()); + } + toSort.add(messages.getString(lang.getShortName()) + + ": " + StringTools.listToString(names, ", ")); + } + } + } + Collections.sort(toSort); + for (final String lElem : toSort) { + maintainersInfo.append(lElem); + maintainersInfo.append('\n'); + } + return maintainersInfo.toString(); + } +} |