summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de
diff options
context:
space:
mode:
authorArno Teigseth <arno@teigseth.no>2011-02-05 08:48:27 +0000
committerArno Teigseth <arno@teigseth.no>2011-02-05 08:48:27 +0000
commit4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch)
tree7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de
parentecaee85ab5984ebadd56721c295dc26b3335f7ce (diff)
downloadgrammar-norwegian-master.tar.gz
grammar-norwegian-master.tar.bz2
grammar-norwegian-master.tar.xz
added more files, to complete languagetool uploadHEADmaster
Diffstat (limited to 'JLanguageTool/src/java/de')
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java197
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedToken.java137
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java284
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/JLanguageTool.java802
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/Language.java336
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/Main.java567
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle.properties186
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_be.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ca.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_cs.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_da.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_de.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_en.properties189
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_eo.properties198
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_es.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_fr.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_gl.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_is.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_it.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_lt.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nb.properties188
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nl.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_pl.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ro.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ru.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sk.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sl.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sv.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_uk.properties93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/TextFilter.java30
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/bitext/BitextReader.java62
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/bitext/StringPair.java49
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java129
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/bitext/WordFastTMReader.java87
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/databroker/DefaultResourceDataBroker.java360
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/databroker/ResourceDataBroker.java139
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/AboutDialog.java58
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/Configuration.java233
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/ConfigurationDialog.java497
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/LanguageManagerDialog.java184
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/Main.java738
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/MainMenuBar.java170
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/MissingJdicException.java38
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/gui/Tools.java192
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java72
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java104
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java91
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java63
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java73
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java78
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java60
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java99
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java103
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java72
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java90
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java87
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java74
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java80
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java70
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java86
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java116
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java112
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java114
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java75
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java94
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java75
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java73
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/ConfigThread.java78
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/Main.java760
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/OOoAboutDialog.java64
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/SingletonFactory.java48
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractCompoundRule.java279
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractPunctuationCheckRule.java93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractSimpleReplaceRule.java159
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/Category.java85
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/CommaWhitespaceRule.java170
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/DoublePunctuationRule.java99
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRule.java314
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/IncorrectExample.java62
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/Rule.java230
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/RuleMatch.java239
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java136
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/WhitespaceRule.java91
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/WordRepeatRule.java101
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/BitextRule.java106
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/DifferentLengthRule.java93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/IncorrectBitextExample.java64
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/SameTranslationRule.java88
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRule.java90
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRule.java85
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/AgreementRule.java405
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CaseRule.java358
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CompoundRule.java53
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/DashRule.java84
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanLemmatizer.java84
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanRule.java30
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanWordRepeatRule.java39
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WiederVsWiderRule.java91
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WordCoherencyRule.java156
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/AvsAnRule.java251
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/CompoundRule.java55
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishRule.java30
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRule.java89
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/ElwithFemRule.java179
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/SpanishRule.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/FrenchRule.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRule.java161
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/AbstractPatternRule.java223
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Element.java803
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleLoader.java356
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Match.java551
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRule.java652
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRuleLoader.java369
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java432
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/XMLRuleHandler.java568
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRule.java93
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRuleLoader.java413
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextXMLRuleHandler.java56
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/FalseFriendsAsBitextLoader.java72
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java55
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java200
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java82
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/CompoundRule.java58
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/SimpleReplaceRule.java264
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRule.java80
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianCompoundRule.java57
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianRule.java30
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRule.java62
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/CompoundRule.java55
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakRule.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakVes.java146
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/CompoundRule.java247
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/SwedishRule.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/PunctuationCheckRule.java76
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/SimpleReplaceRule.java50
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/server/HTTPServer.java341
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/server/PortBindingException.java36
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/BaseSynthesizer.java87
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/Synthesizer.java58
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/SynthesizerTools.java64
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ca/CatalanSynthesizer.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizer.java99
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizer.java44
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizer.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizer.java171
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizer.java40
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ru/RussianSynthesizer.java44
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizer.java40
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/BaseTagger.java152
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ManualTagger.java127
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/Tagger.java57
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/be/BelarusianTagger.java58
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ca/CatalanTagger.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/cs/CzechTagger.java115
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/da/DanishTagger.java50
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanToken.java136
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanTokenReadings.java172
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanTagger.java201
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanToken.java104
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/Disambiguator.java50
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunker.java199
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishHybridDisambiguator.java48
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/AbstractRuleDisambiguator.java83
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambXMLRuleHandler.java52
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguatedExample.java77
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java357
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java453
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguator.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguator.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/nl/DutchRuleDisambiguator.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/pl/PolishRuleDisambiguator.java31
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguator.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ru/RussianRuleDisambiguator.java36
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/DemoDisambiguator.java38
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/TrimDisambiguator.java53
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/en/EnglishTagger.java43
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/eo/EsperantoTagger.java360
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/es/SpanishTagger.java43
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/fr/FrenchTagger.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/gl/GalicianTagger.java43
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/it/ItalianTagger.java46
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ml/MalayalamTagger.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nb/BokmalTagger.java43
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nl/DutchTagger.java41
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/pl/PolishTagger.java117
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ro/RomanianTagger.java102
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ru/RussianTagger.java42
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sk/SlovakTagger.java40
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sv/SwedishTagger.java39
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/IPOSTag.java30
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMorfoTagger.java35
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMyspellTagger.java149
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianTagger.java26
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tagging/xx/DemoTagger.java63
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizer.java99
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SentenceTokenizer.java250
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/Tokenizer.java32
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/WordTokenizer.java59
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizer.java228
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/da/DanishSentenceTokenizer.java43
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanCompoundTokenizer.java47
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizer.java95
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/en/EnglishWordTokenizer.java53
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/gl/GalicianWordTokenizer.java53
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ml/MalayalamWordTokenizer.java55
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizer.java53
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizer.java56
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore1
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java232
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java581
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java37
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java626
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java127
219 files changed, 29932 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
new file mode 100644
index 0000000..6c50282
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
@@ -0,0 +1,197 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings;
+
+/**
+ * A sentence that has been tokenized and analyzed.
+ *
+ * @author Daniel Naber
+ */
+public class AnalyzedSentence {
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + Arrays.hashCode(nonBlankTokens);
+ result = prime * result + Arrays.hashCode(tokens);
+ result = prime * result + Arrays.hashCode(whPositions);
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final AnalyzedSentence other = (AnalyzedSentence) obj;
+ if (!Arrays.equals(nonBlankTokens, other.nonBlankTokens))
+ return false;
+ if (!Arrays.equals(tokens, other.tokens))
+ return false;
+ if (!Arrays.equals(whPositions, other.whPositions))
+ return false;
+ return true;
+ }
+
+ private AnalyzedTokenReadings[] tokens;
+
+ private AnalyzedTokenReadings[] nonBlankTokens;
+
+ /**
+ * Array mapping positions of tokens as returned with
+ * getTokensWithoutWhitespace() to the internal tokens array.
+ */
+ private int[] whPositions;
+
+ /**
+ * Sets {@link AnalyzedTokenReadings}. Whitespace is also a token.
+ */
+ public AnalyzedSentence(final AnalyzedTokenReadings[] tokens) {
+ this.tokens = tokens;
+ }
+
+ public AnalyzedSentence(final AnalyzedTokenReadings[] tokens, final
+ int[] whPositions) {
+ this.tokens = tokens;
+ this.setWhPositions(whPositions);
+ getTokensWithoutWhitespace();
+ }
+
+ /**
+ * Returns the {@link AnalyzedTokenReadings} of the analyzed text. Whitespace
+ * is also a token.
+ */
+ public final AnalyzedTokenReadings[] getTokens() {
+ return tokens;
+ }
+
+ /**
+ * Returns the {@link AnalyzedTokenReadings} of the analyzed text, with
+ * whitespace tokens removed but with the artificial <code>SENT_START</code>
+ * token included.
+ */
+ public final AnalyzedTokenReadings[] getTokensWithoutWhitespace() {
+ if (nonBlankTokens == null) {
+ int whCounter = 0;
+ int nonWhCounter = 0;
+ final int[] mapping = new int[tokens.length + 1];
+ final List<AnalyzedTokenReadings> l = new ArrayList<AnalyzedTokenReadings>();
+ for (final AnalyzedTokenReadings token : tokens) {
+ if (!token.isWhitespace() || token.isSentStart() || token.isSentEnd()
+ || token.isParaEnd()) {
+ l.add(token);
+ mapping[nonWhCounter] = whCounter;
+ nonWhCounter++;
+ }
+ whCounter++;
+ }
+ setNonBlankTokens(l.toArray(new AnalyzedTokenReadings[l.size()]));
+ setWhPositions(mapping.clone());
+ }
+ return nonBlankTokens.clone();
+ }
+
+ /**
+ * Get a position of a non-whitespace token in the original sentence with
+ * whitespace.
+ *
+ * @param nonWhPosition
+ * Position of a non-whitespace token
+ * @return int position in the original sentence.
+ */
+ public final int getOriginalPosition(final int nonWhPosition) {
+ if (nonBlankTokens == null) {
+ getTokensWithoutWhitespace();
+ }
+ return getWhPositions()[nonWhPosition];
+ }
+
+ @Override
+ public final String toString() {
+ final StringBuilder sb = new StringBuilder();
+ for (final AnalyzedTokenReadings element : tokens) {
+ if (!element.isWhitespace()) {
+ sb.append(element.getToken());
+ sb.append('[');
+ }
+ for (int j = 0; j < element.getReadingsLength(); j++) {
+ final String posTag = element.getAnalyzedToken(j).getPOSTag();
+ if (element.isSentStart()) {
+ sb.append("<S>");
+ } else if (JLanguageTool.SENTENCE_END_TAGNAME.equals(element
+ .getAnalyzedToken(j).getPOSTag())) {
+ sb.append("</S>");
+ } else if (JLanguageTool.PARAGRAPH_END_TAGNAME.equals(element
+ .getAnalyzedToken(j).getPOSTag())) {
+ sb.append("<P/>");
+ } else if (element.getAnalyzedToken(j) != null && posTag == null
+ && !(element instanceof AnalyzedGermanTokenReadings)) {
+ // FIXME: don't depend on AnalyzedGermanTokenReadings here
+ sb.append(element.getAnalyzedToken(j).getToken());
+ } else {
+ if (!element.isWhitespace()) {
+ sb.append(element.getAnalyzedToken(j));
+ if (j < element.getReadingsLength() - 1) {
+ sb.append(',');
+ }
+ }
+ }
+ }
+ if (!element.isWhitespace()) {
+ sb.append(']');
+ } else {
+ sb.append(' ');
+ }
+
+ }
+ return sb.toString();
+ }
+
+ /**
+ * @param whPositions the whPositions to set
+ */
+ public void setWhPositions(int[] whPositions) {
+ this.whPositions = whPositions;
+ }
+
+ /**
+ * @return the whPositions
+ */
+ public int[] getWhPositions() {
+ return whPositions;
+ }
+
+ /**
+ * @param nonBlankTokens the nonBlankTokens to set
+ */
+ public void setNonBlankTokens(AnalyzedTokenReadings[] nonBlankTokens) {
+ this.nonBlankTokens = nonBlankTokens;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedToken.java b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedToken.java
new file mode 100644
index 0000000..d36274a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedToken.java
@@ -0,0 +1,137 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+/**
+ * A word (or punctuation, or whitespace) and its part-of-speech tag.
+ *
+ * @author Daniel Naber
+ */
+public class AnalyzedToken {
+
+ private final String token;
+ private final String posTag;
+ private final String lemma;
+
+ /**
+ * used only for matching with Elements
+ */
+ private final String tokenInflected;
+
+ private boolean isWhitespaceBefore;
+
+ public AnalyzedToken(final String token, final String posTag, final String lemma) {
+ if (token == null) {
+ throw new NullPointerException("Token cannot be null!");
+ }
+ this.token = token;
+ this.posTag = posTag;
+ this.lemma = lemma;
+ if (lemma == null) {
+ tokenInflected = token;
+ } else {
+ tokenInflected = lemma;
+ }
+ }
+
+ public final String getToken() {
+ return token;
+ }
+
+ public final String getPOSTag() {
+ return posTag;
+ }
+
+ public final String getLemma() {
+ return lemma;
+ }
+
+ public final String getTokenInflected() {
+ return tokenInflected;
+ }
+
+ public final void setWhitespaceBefore(final boolean isWhite) {
+ isWhitespaceBefore = isWhite;
+ }
+
+ public final boolean isWhitespaceBefore() {
+ return isWhitespaceBefore;
+ }
+
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append(tokenInflected);
+ sb.append('/');
+ sb.append(posTag);
+ return sb.toString();
+ }
+
+ @Override
+ public final int hashCode() {
+ // TODO: use Apache Commons Lang HashCodeBuilder
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (isWhitespaceBefore ? 1231 : 1237);
+ result = prime * result + ((lemma == null) ? 0 : lemma.hashCode());
+ result = prime * result + ((posTag == null) ? 0 : posTag.hashCode());
+ result = prime * result + ((token == null) ? 0 : token.hashCode());
+ return result;
+ }
+
+ @Override
+ public final boolean equals(final Object obj) {
+ // TODO: use Apache Commons Lang EqualsBuilder
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final AnalyzedToken other = (AnalyzedToken) obj;
+ if (isWhitespaceBefore != other.isWhitespaceBefore) {
+ return false;
+ }
+ if (lemma == null) {
+ if (other.lemma != null) {
+ return false;
+ }
+ } else if (!lemma.equals(other.lemma)) {
+ return false;
+ }
+ if (posTag == null) {
+ if (other.posTag != null) {
+ return false;
+ }
+ } else if (!posTag.equals(other.posTag)) {
+ return false;
+ }
+ if (token == null) {
+ if (other.token != null) {
+ return false;
+ }
+ } else if (!token.equals(other.token)) {
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java
new file mode 100644
index 0000000..ac6dc54
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java
@@ -0,0 +1,284 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * An array of {@link AnalyzedToken}s used to store multiple POS tags and lemmas
+ * for a given single token.
+ *
+ * @author Marcin Milkowski
+ */
+public class AnalyzedTokenReadings {
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + Arrays.hashCode(anTokReadings);
+ result = prime * result + (isLinebreak ? 1231 : 1237);
+ result = prime * result + (isParaEnd ? 1231 : 1237);
+ result = prime * result + (isSentEnd ? 1231 : 1237);
+ result = prime * result + (isSentStart ? 1231 : 1237);
+ result = prime * result + (isWhitespace ? 1231 : 1237);
+ result = prime * result + (isWhitespaceBefore ? 1231 : 1237);
+ result = prime * result + startPos;
+ result = prime * result + ((token == null) ? 0 : token.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final AnalyzedTokenReadings other = (AnalyzedTokenReadings) obj;
+ if (!Arrays.equals(anTokReadings, other.anTokReadings))
+ return false;
+ if (isLinebreak != other.isLinebreak)
+ return false;
+ if (isParaEnd != other.isParaEnd)
+ return false;
+ if (isSentEnd != other.isSentEnd)
+ return false;
+ if (isSentStart != other.isSentStart)
+ return false;
+ if (isWhitespace != other.isWhitespace)
+ return false;
+ if (isWhitespaceBefore != other.isWhitespaceBefore)
+ return false;
+ if (startPos != other.startPos)
+ return false;
+ if (token == null) {
+ if (other.token != null)
+ return false;
+ } else if (!token.equals(other.token))
+ return false;
+ return true;
+ }
+
+ protected AnalyzedToken[] anTokReadings;
+ private int startPos;
+ private String token;
+
+ private boolean isWhitespace;
+ private boolean isLinebreak;
+ private boolean isSentEnd;
+ private boolean isSentStart;
+ private boolean isParaEnd;
+
+ private boolean isWhitespaceBefore;
+
+ public AnalyzedTokenReadings(final AnalyzedToken[] r, final int startPos) {
+ anTokReadings = r.clone();
+ this.startPos = startPos;
+ init();
+ }
+
+ public AnalyzedTokenReadings(final List<AnalyzedToken> list, final int startPos) {
+ anTokReadings = list.toArray(new AnalyzedToken[list.size()]);
+ this.startPos = startPos;
+ init();
+ }
+
+ AnalyzedTokenReadings(final AnalyzedToken at) {
+ anTokReadings = new AnalyzedToken[1];
+ anTokReadings[0] = at;
+ isWhitespaceBefore = at.isWhitespaceBefore();
+ init();
+ }
+
+ public AnalyzedTokenReadings(final AnalyzedToken at, final int startPos) {
+ this(at);
+ this.startPos = startPos;
+ }
+
+ private void init() {
+ token = anTokReadings[0].getToken();
+ isWhitespace = StringTools.isWhitespace(token);
+ isLinebreak = "\n".equals(token) || "\r\n".equals(token)
+ || "\r".equals(token) || "\n\r".equals(token);
+ isSentStart = JLanguageTool.SENTENCE_START_TAGNAME.equals(anTokReadings[0]
+ .getPOSTag());
+ isParaEnd = hasPosTag(JLanguageTool.PARAGRAPH_END_TAGNAME);
+ isSentEnd = hasPosTag(JLanguageTool.SENTENCE_END_TAGNAME);
+ }
+
+ public final List<AnalyzedToken> getReadings() {
+ return Arrays.asList(anTokReadings);
+ }
+
+ /**
+ * Checks if the token has a particular POS tag.
+ *
+ * @param pos
+ * POS Tag to check
+ * @return True if it does.
+ */
+ public final boolean hasPosTag(final String pos) {
+ boolean found = false;
+ for (final AnalyzedToken reading : anTokReadings) {
+ if (reading.getPOSTag() != null) {
+ found = pos.equals(reading.getPOSTag());
+ if (found) {
+ break;
+ }
+ }
+ }
+ return found;
+ }
+
+ public final AnalyzedToken getAnalyzedToken(final int i) {
+ return anTokReadings[i];
+ }
+
+ public final void addReading(final AnalyzedToken tok) {
+ final ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ for (int i = 0; i < anTokReadings.length - 1; i++) {
+ l.add(anTokReadings[i]);
+ }
+ if (anTokReadings[anTokReadings.length - 1].getPOSTag() != null) {
+ l.add(anTokReadings[anTokReadings.length - 1]);
+ }
+ tok.setWhitespaceBefore(isWhitespaceBefore);
+ l.add(tok);
+ anTokReadings = l.toArray(new AnalyzedToken[l.size()]);
+ if (tok.getToken().length() > token.length()) { //in case a longer token is added
+ token = tok.getToken();
+ }
+ anTokReadings[anTokReadings.length - 1].
+ setWhitespaceBefore(isWhitespaceBefore);
+ isParaEnd = hasPosTag(JLanguageTool.PARAGRAPH_END_TAGNAME);
+ isSentEnd = hasPosTag(JLanguageTool.SENTENCE_END_TAGNAME);
+ }
+
+ public final void removeReading(final AnalyzedToken tok) {
+ final ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ final AnalyzedToken tmpTok = new AnalyzedToken(tok.getToken(), tok
+ .getPOSTag(), tok.getLemma());
+ tmpTok.setWhitespaceBefore(isWhitespaceBefore);
+ for (AnalyzedToken anTokReading : anTokReadings) {
+ if (!anTokReading.equals(tmpTok)) {
+ l.add(anTokReading);
+ }
+ }
+ anTokReadings = l.toArray(new AnalyzedToken[l.size()]);
+ }
+
+ public final int getReadingsLength() {
+ return anTokReadings.length;
+ }
+
+ public final boolean isWhitespace() {
+ return isWhitespace;
+ }
+
+ /**
+ * Returns true if the token equals \n, \r\n \n\r or \r\n.
+ */
+ public final boolean isLinebreak() {
+ return isLinebreak;
+ }
+
+ public final boolean isSentStart() {
+ return isSentStart;
+ }
+
+ /**
+ * @return true when the token is a last token in a paragraph.
+ */
+ public final boolean isParaEnd() {
+ return isParaEnd;
+ }
+
+ /**
+ * Add PARA_END tag.
+ */
+ public void setParaEnd() {
+ final AnalyzedToken paragraphEnd = new AnalyzedToken(getToken(),
+ JLanguageTool.PARAGRAPH_END_TAGNAME, getAnalyzedToken(0).getLemma());
+ addReading(paragraphEnd);
+ }
+
+ /**
+ * @return true when the token is a last token in a sentence.
+ */
+ public final boolean isSentEnd() {
+ return isSentEnd;
+ }
+
+ /**
+ * @since 0.9.9
+ * @return true if the token is OpenOffice field code.
+ */
+ public final boolean isFieldCode() {
+ return "\u0001".equals(token) || "\u0002".equals(token);
+ }
+
+ /**
+ * Add a SENT_END tag.
+ */
+ public final void setSentEnd() {
+ final AnalyzedToken sentenceEnd = new AnalyzedToken(getToken(),
+ JLanguageTool.SENTENCE_END_TAGNAME, getAnalyzedToken(0).getLemma());
+ addReading(sentenceEnd);
+ }
+
+ public final int getStartPos() {
+ return startPos;
+ }
+
+ public final void setStartPos(final int position) {
+ startPos = position;
+ }
+
+ public final String getToken() {
+ return token;
+ }
+
+ public final void setWhitespaceBefore(final boolean isWhite) {
+ isWhitespaceBefore = isWhite;
+ for (final AnalyzedToken aTok : anTokReadings) {
+ aTok.setWhitespaceBefore(isWhite);
+ }
+ }
+
+ public final boolean isWhitespaceBefore() {
+ return isWhitespaceBefore;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ for (final AnalyzedToken element : anTokReadings) {
+ sb.append(element);
+ }
+ return sb.toString();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/JLanguageTool.java b/JLanguageTool/src/java/de/danielnaber/languagetool/JLanguageTool.java
new file mode 100644
index 0000000..44bdfec
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/JLanguageTool.java
@@ -0,0 +1,802 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.databroker.DefaultResourceDataBroker;
+import de.danielnaber.languagetool.databroker.ResourceDataBroker;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.FalseFriendRuleLoader;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+import de.danielnaber.languagetool.rules.patterns.PatternRuleLoader;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tools.ReflectionUtils;
+
+/**
+ * The main class used for checking text against different rules:
+ * <ul>
+ * <li>the built-in rules (<i>a</i> vs. <i>an</i>, whitespace after commas, ...)
+ * <li>pattern rules loaded from external XML files with
+ * {@link #loadPatternRules(String)}
+ * <li>your own implementation of the abstract {@link Rule} classes added with
+ * {@link #addRule(Rule)}
+ * </ul>
+ *
+ * <p>
+ * Note that the constructors create a language checker that uses the built-in
+ * rules only. Other rules (e.g. from XML) need to be added explicitly.
+ *
+ * @author Daniel Naber
+ */
+@SuppressWarnings({"UnusedDeclaration"})
+public final class JLanguageTool {
+
+ public static final String VERSION = "1.3-dev"; // keep in sync with
+ // build.properties!
+
+ private static ResourceDataBroker dataBroker = new DefaultResourceDataBroker();
+ public static final String PATTERN_FILE = "grammar.xml";
+ public static final String FALSE_FRIEND_FILE = "false-friends.xml";
+
+ public static final String SENTENCE_START_TAGNAME = "SENT_START";
+ public static final String SENTENCE_END_TAGNAME = "SENT_END";
+ public static final String PARAGRAPH_END_TAGNAME = "PARA_END";
+
+ private final List<Rule> builtinRules = new ArrayList<Rule>();
+ private final List<Rule> userRules = new ArrayList<Rule>(); // rules added via addRule() method
+ private final Set<String> disabledRules = new HashSet<String>();
+ private final Set<String> enabledRules = new HashSet<String>();
+
+ private final Set<String> disabledCategories = new HashSet<String>();
+
+ private Language language;
+ private Language motherTongue;
+ private Disambiguator disambiguator;
+ private Tagger tagger;
+ private Tokenizer sentenceTokenizer;
+ private Tokenizer wordTokenizer;
+
+ private PrintStream printStream;
+
+ private int sentenceCount;
+
+ private boolean listUnknownWords;
+ private Set<String> unknownWords;
+
+ /**
+ * Constants for correct paragraph-rule handling.
+ */
+ public static enum paragraphHandling {
+ /**
+ * Handle normally - all kinds of rules run.
+ */
+ NORMAL,
+ /**
+ * Run only paragraph-level rules.
+ */
+ ONLYPARA,
+ /**
+ * Run only sentence-level rules.
+ */
+ ONLYNONPARA
+ }
+
+ // just for testing:
+ /*
+ * private Rule[] allBuiltinRules = new Rule[] { new
+ * UppercaseSentenceStartRule() };
+ */
+
+ /**
+ * Create a JLanguageTool and setup the built-in rules appropriate for the
+ * given language, ignoring false friend hints.
+ *
+ * @throws IOException
+ */
+ public JLanguageTool(final Language language) throws IOException {
+ this(language, null);
+ }
+
+ /**
+ * Create a JLanguageTool and setup the built-in rules appropriate for the
+ * given language.
+ *
+ * @param language
+ * the language to be used.
+ * @param motherTongue
+ * the user's mother tongue or <code>null</code>. The mother tongue
+ * may also be used as a source language for checking bilingual texts.
+ *
+ * @throws IOException
+ */
+ public JLanguageTool(final Language language, final Language motherTongue)
+ throws IOException {
+ if (language == null) {
+ throw new NullPointerException("language cannot be null");
+ }
+ this.language = language;
+ this.motherTongue = motherTongue;
+ final ResourceBundle messages = getMessageBundle(language);
+ final Rule[] allBuiltinRules = getAllBuiltinRules(language, messages);
+ for (final Rule element : allBuiltinRules) {
+ if (element.supportsLanguage(language)) {
+ builtinRules.add(element);
+ }
+ }
+ disambiguator = language.getDisambiguator();
+ tagger = language.getTagger();
+ sentenceTokenizer = language.getSentenceTokenizer();
+ wordTokenizer = language.getWordTokenizer();
+ }
+
+ /**
+ * The grammar checker does need resources from following
+ * directories:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@code /resource}</li>
+ * <li>{@code /rules}</li>
+ * </ul>
+ *
+ * This method is thread-safe.
+ *
+ * @return The currently set data broker which allows to obtain
+ * resources from the mentioned directories above. If no
+ * data broker was set, a new {@link DefaultResourceDataBroker} will
+ * be instantiated and returned.
+ * @since 1.0.1
+ */
+ public static synchronized ResourceDataBroker getDataBroker() {
+ if (JLanguageTool.dataBroker == null) {
+ JLanguageTool.dataBroker = new DefaultResourceDataBroker();
+ }
+ return JLanguageTool.dataBroker;
+ }
+
+ /**
+ * The grammar checker does need resources from following
+ * directories:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@code /resource}</li>
+ * <li>{@code /rules}</li>
+ * </ul>
+ *
+ * This method is thread-safe.
+ *
+ * @param broker The new resource broker to be used.
+ * @since 1.0.1
+ */
+ public static synchronized void setDataBroker(ResourceDataBroker broker) {
+ JLanguageTool.dataBroker = broker;
+ }
+
+ /**
+ * Whether the check() method stores unknown words. If set to
+ * <code>true</code> (default: false), you can get the list of unknown words
+ * using getUnknownWords().
+ */
+ public void setListUnknownWords(final boolean listUnknownWords) {
+ this.listUnknownWords = listUnknownWords;
+ }
+
+ /**
+ * Gets the ResourceBundle for the default language of the user's system.
+ */
+ public static ResourceBundle getMessageBundle() {
+ try {
+ return ResourceBundle
+ .getBundle("de.danielnaber.languagetool.MessagesBundle");
+ } catch (final MissingResourceException e) {
+ return ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", Locale.ENGLISH);
+ }
+ }
+
+ /**
+ * Gets the ResourceBundle for the given user interface language.
+ */
+ private static ResourceBundle getMessageBundle(final Language lang) {
+ try {
+ return ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", lang.getLocale());
+ } catch (final MissingResourceException e) {
+ return ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", Locale.ENGLISH);
+ }
+ }
+
+ private Rule[] getAllBuiltinRules(final Language language,
+ final ResourceBundle messages) {
+ // use reflection to get a list of all non-pattern rules under
+ // "de.danielnaber.languagetool.rules"
+ // generic rules first, then language-specific ones
+ // TODO: the order of loading classes is not guaranteed so we may want to
+ // implement rule
+ // precedence
+
+ final List<Rule> rules = new ArrayList<Rule>();
+ try {
+ // we pass ".*Rule$" regexp to improve efficiency, see javadoc
+ final Class[] classes1 = ReflectionUtils.findClasses(Rule.class
+ .getClassLoader(), Rule.class.getPackage().getName(), ".*Rule$", 0,
+ Rule.class, null);
+ final Class[] classes2 = ReflectionUtils.findClasses(Rule.class
+ .getClassLoader(), Rule.class.getPackage().getName() + "."
+ + language.getShortName(), ".*Rule$", 0, Rule.class, null);
+
+ final List<Class> classes = new ArrayList<Class>();
+ classes.addAll(Arrays.asList(classes1));
+ classes.addAll(Arrays.asList(classes2));
+
+ for (final Class class1 : classes) {
+ final Constructor[] constructors = class1.getConstructors();
+ for (final Constructor constructor : constructors) {
+ final Class[] paramTypes = constructor.getParameterTypes();
+ if (paramTypes.length == 1
+ && paramTypes[0].equals(ResourceBundle.class)) {
+ rules.add((Rule) constructor.newInstance(messages));
+ break;
+ }
+ if (paramTypes.length == 2
+ && paramTypes[0].equals(ResourceBundle.class)
+ && paramTypes[1].equals(Language.class)) {
+ rules.add((Rule) constructor.newInstance(messages, language));
+ break;
+ }
+ throw new RuntimeException("Unknown constructor for rule class: "
+ + class1.getName());
+ }
+ }
+ } catch (final Exception e) {
+ throw new RuntimeException("Failed to load rules for language " + language, e);
+ }
+ // System.err.println("Loaded " + rules.size() + " rules");
+ return rules.toArray(new Rule[rules.size()]);
+ }
+
+ /**
+ * Set a PrintStream that will receive verbose output. Set to
+ * <code>null</code> to disable verbose output.
+ */
+ public void setOutput(final PrintStream printStream) {
+ this.printStream = printStream;
+ }
+
+ /**
+ * Load pattern rules from an XML file. Use {@link #addRule} to add these
+ * rules to the checking process.
+ *
+ * @throws IOException
+ * @return a List of {@link PatternRule} objects
+ */
+ public List<PatternRule> loadPatternRules(final String filename)
+ throws IOException {
+ final PatternRuleLoader ruleLoader = new PatternRuleLoader();
+ InputStream is = this.getClass().getResourceAsStream(filename);
+ if (is == null) {
+ // happens for external rules plugged in as an XML file:
+ is = new FileInputStream(filename);
+ }
+ return ruleLoader.getRules(is, filename);
+ }
+
+ /**
+ * Load false friend rules from an XML file. Only those pairs will be loaded
+ * that match the current text language and the mother tongue specified in the
+ * JLanguageTool constructor. Use {@link #addRule} to add these rules to the
+ * checking process.
+ *
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ * @return a List of {@link PatternRule} objects
+ */
+ public List<PatternRule> loadFalseFriendRules(final String filename)
+ throws ParserConfigurationException, SAXException, IOException {
+ if (motherTongue == null) {
+ return new ArrayList<PatternRule>();
+ }
+ final FalseFriendRuleLoader ruleLoader = new FalseFriendRuleLoader();
+ return ruleLoader.getRules(this.getClass().getResourceAsStream(filename),
+ language, motherTongue);
+ }
+
+ /**
+ * Loads and activates the pattern rules from
+ * <code>rules/&lt;language&gt;/grammar.xml</code>.
+ *
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
+ public void activateDefaultPatternRules() throws IOException {
+ final String defaultPatternFilename = language.getRuleFileName();
+ final List<PatternRule> patternRules = loadPatternRules(defaultPatternFilename);
+ userRules.addAll(patternRules);
+ }
+
+ /**
+ * Loads and activates the false friend rules from
+ * <code>rules/false-friends.xml</code>.
+ *
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
+ public void activateDefaultFalseFriendRules()
+ throws ParserConfigurationException, SAXException, IOException {
+ final String falseFriendRulesFilename = JLanguageTool.getDataBroker().getRulesDir() + "/" + FALSE_FRIEND_FILE;
+ final List<PatternRule> patternRules = loadFalseFriendRules(falseFriendRulesFilename);
+ userRules.addAll(patternRules);
+ }
+
+ /**
+ * Add a rule to be used by the next call to {@link #check}.
+ */
+ public void addRule(final Rule rule) {
+ userRules.add(rule);
+ }
+
+ /**
+ * Disable a given rule so {@link #check} won't use it.
+ *
+ * @param ruleId
+ * the id of the rule to disable
+ */
+ public void disableRule(final String ruleId) {
+ // TODO: check if such a rule exists
+ disabledRules.add(ruleId);
+ }
+
+ /**
+ * Disable a given category so {@link #check} won't use it.
+ *
+ * @param categoryName
+ * the id of the category to disable
+ */
+ public void disableCategory(final String categoryName) {
+ // TODO: check if such a rule exists
+ disabledCategories.add(categoryName);
+ }
+
+ /**
+ * Get the language that was used to configure this instance.
+ */
+ public Language getLanguage() {
+ return language;
+ }
+
+ /**
+ * Get rule ids of the rules that have been explicitly disabled.
+ */
+ public Set<String> getDisabledRules() {
+ return disabledRules;
+ }
+
+ /**
+ * Enable a rule that was switched off by default.
+ *
+ * @param ruleId
+ * the id of the turned off rule to enable.
+ *
+ */
+ public void enableDefaultOffRule(final String ruleId) {
+ enabledRules.add(ruleId);
+ }
+
+ /**
+ * Get category ids of the rules that have been explicitly disabled.
+ */
+ public Set<String> getDisabledCategories() {
+ return disabledCategories;
+ }
+
+ /**
+ * Re-enable a given rule so {@link #check} will use it.
+ *
+ * @param ruleId
+ * the id of the rule to enable
+ */
+ public void enableRule(final String ruleId) {
+ if (disabledRules.contains(ruleId)) {
+ disabledRules.remove(ruleId);
+ }
+ }
+
+ /**
+ * Returns tokenized sentences.
+ */
+ public List<String> sentenceTokenize(final String text) {
+ return sentenceTokenizer.tokenize(text);
+ }
+
+ /**
+ * The main check method. Tokenizes the text into sentences and matches these
+ * sentences against all currently active rules.
+ *
+ * @param text
+ * the text to check
+ * @return a List of {@link RuleMatch} objects
+ * @throws IOException
+ */
+ public List<RuleMatch> check(final String text) throws IOException {
+ return check(text, true, paragraphHandling.NORMAL);
+ }
+
+
+ /**
+ * The main check method. Tokenizes the text into sentences and matches these
+ * sentences against all currently active rules.
+ *
+ * @param text
+ * the text to check
+ * @param tokenizeText
+ * If true, then the text is tokenized into sentences.
+ * Otherwise, it is assumed it's already tokenized.
+ * @param paraMode
+ * Uses paragraph-level rules only if true.
+
+ * @return a List of {@link RuleMatch} objects
+ * @throws IOException
+ */
+ public List<RuleMatch> check(final String text, boolean tokenizeText, final paragraphHandling paraMode) throws IOException {
+ sentenceCount = 0;
+ final List<String> sentences;
+ if (tokenizeText) {
+ sentences = sentenceTokenize(text);
+ } else {
+ sentences = new ArrayList<String>();
+ sentences.add(text);
+ }
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final List<Rule> allRules = getAllRules();
+ printIfVerbose(allRules.size() + " rules activated for language "
+ + language);
+ int charCount = 0;
+ int lineCount = 0;
+ int columnCount = 1;
+ unknownWords = new HashSet<String>();
+ for (final String sentence : sentences) {
+ sentenceCount++;
+ AnalyzedSentence analyzedText = getAnalyzedSentence(sentence);
+ rememberUnknownWords(analyzedText);
+
+ if (sentenceCount == sentences.size()) {
+ final AnalyzedTokenReadings[] anTokens = analyzedText.getTokens();
+ anTokens[anTokens.length - 1].setParaEnd();
+ analyzedText = new AnalyzedSentence(anTokens);
+ }
+
+ printIfVerbose(analyzedText.toString());
+ final List<RuleMatch> sentenceMatches =
+ checkAnalyzedSentence(paraMode, allRules, charCount, lineCount,
+ columnCount, sentence, analyzedText);
+
+ Collections.sort(sentenceMatches);
+ ruleMatches.addAll(sentenceMatches);
+ charCount += sentence.length();
+ lineCount += countLineBreaks(sentence);
+
+ // calculate matching column:
+ final int lineBreakPos = sentence.indexOf('\n');
+ if (lineBreakPos == -1) {
+ columnCount += sentence.length() -1;
+ } else {
+ if (lineBreakPos == 0) {
+ columnCount = sentence.length();
+ if (!language.getSentenceTokenizer().
+ singleLineBreaksMarksPara()) {
+ columnCount--;
+ }
+ } else {
+ columnCount = 1;
+ }
+ }
+ }
+
+ if (!paraMode.equals(paragraphHandling.ONLYNONPARA)) {
+ // removing false positives in paragraph-level rules
+ for (final Rule rule : allRules) {
+ if (rule.isParagraphBackTrack() && (rule.getMatches() != null)) {
+ final List<RuleMatch> rm = rule.getMatches();
+ for (final RuleMatch r : rm) {
+ if (rule.isInRemoved(r)) {
+ ruleMatches.remove(r);
+ }
+ }
+ }
+ }
+ }
+
+ return ruleMatches;
+ }
+
+ public List<RuleMatch> checkAnalyzedSentence(final paragraphHandling paraMode,
+ final List<Rule> allRules, int tokenCount, int lineCount,
+ int columnCount, final String sentence, AnalyzedSentence analyzedText)
+ throws IOException {
+ final List<RuleMatch> sentenceMatches = new ArrayList<RuleMatch>();
+ for (final Rule rule : allRules) {
+ if (disabledRules.contains(rule.getId())
+ || (rule.isDefaultOff() && !enabledRules.contains(rule.getId()))) {
+ continue;
+ }
+
+ if (disabledCategories.contains(rule.getCategory().getName())) {
+ continue;
+ }
+
+ switch (paraMode) {
+ case ONLYNONPARA: {
+ if (rule.isParagraphBackTrack()) {
+ continue;
+ }
+ break;
+ }
+ case ONLYPARA: {
+ if (!rule.isParagraphBackTrack()) {
+ continue;
+ }
+ break;
+ }
+ case NORMAL:
+ default:
+ }
+
+ final RuleMatch[] thisMatches = rule.match(analyzedText);
+ for (final RuleMatch element1 : thisMatches) {
+ RuleMatch thisMatch = adjustRuleMatchPos(element1,
+ tokenCount, columnCount, lineCount, sentence);
+ sentenceMatches.add(thisMatch);
+ if (rule.isParagraphBackTrack()) {
+ rule.addRuleMatch(thisMatch);
+ }
+ }
+ }
+ return sentenceMatches;
+ }
+
+ /**
+ * Change RuleMatch positions so they are relative to the complete text,
+ * not just to the sentence:
+ * @param rm RuleMatch
+ * @param sentLen Count of characters
+ * @param columnCount Current column number
+ * @param lineCount Current line number
+ * @param sentence The text being checked
+ * @return
+ * The RuleMatch object with adjustments.
+ */
+ public RuleMatch adjustRuleMatchPos(final RuleMatch rm, int sentLen,
+ int columnCount, int lineCount, final String sentence) {
+ final RuleMatch thisMatch = new RuleMatch(rm.getRule(),
+ rm.getFromPos() + sentLen, rm.getToPos()
+ + sentLen, rm.getMessage(), rm
+ .getShortMessage());
+ thisMatch.setSuggestedReplacements(rm
+ .getSuggestedReplacements());
+ final String sentencePartToError = sentence.substring(0, rm
+ .getFromPos());
+ final String sentencePartToEndOfError = sentence.substring(0,
+ rm.getToPos());
+ final int lastLineBreakPos = sentencePartToError.lastIndexOf('\n');
+ final int column;
+ final int endColumn;
+ if (lastLineBreakPos == -1) {
+ column = sentencePartToError.length() + columnCount;
+ } else {
+ column = sentencePartToError.length() - lastLineBreakPos;
+ }
+ final int lastLineBreakPosInError = sentencePartToEndOfError
+ .lastIndexOf('\n');
+ if (lastLineBreakPosInError == -1) {
+ endColumn = sentencePartToEndOfError.length() + columnCount + 1;
+ } else {
+ endColumn = sentencePartToEndOfError.length() - lastLineBreakPos;
+ }
+ final int lineBreaksToError = countLineBreaks(sentencePartToError);
+ final int lineBreaksToEndOfError = countLineBreaks(sentencePartToEndOfError);
+ thisMatch.setLine(lineCount + lineBreaksToError);
+ thisMatch.setEndLine(lineCount + lineBreaksToEndOfError);
+ thisMatch.setColumn(column);
+ thisMatch.setEndColumn(endColumn);
+ thisMatch.setOffset(rm.getFromPos() + sentLen);
+ return thisMatch;
+ }
+
+ private void rememberUnknownWords(final AnalyzedSentence analyzedText) {
+ if (listUnknownWords) {
+ final AnalyzedTokenReadings[] atr = analyzedText
+ .getTokensWithoutWhitespace();
+ for (final AnalyzedTokenReadings t : atr) {
+ if (t.getReadings().toString().contains("null]")) {
+ unknownWords.add(t.getToken());
+ }
+ }
+ }
+ }
+
+ /**
+ * Get the list of unknown words in the last run of the check() method.
+ *
+ * @throws IllegalStateException
+ * if listUnknownWords is set to <code>false</code>
+ */
+ public List<String> getUnknownWords() {
+ if (!listUnknownWords) {
+ throw new IllegalStateException(
+ "listUnknownWords is set to false, unknown words not stored");
+ }
+ final List<String> words = new ArrayList<String>(unknownWords);
+ Collections.sort(words);
+ return words;
+ }
+
+ static int countLineBreaks(final String s) {
+ int pos = -1;
+ int count = 0;
+ while (true) {
+ final int nextPos = s.indexOf('\n', pos + 1);
+ if (nextPos == -1) {
+ break;
+ }
+ pos = nextPos;
+ count++;
+ }
+ return count;
+ }
+
+ /**
+ * Tokenizes the given <code>sentence</code> into words and analyzes it,
+ * and then disambiguates POS tags.
+ *
+ * @throws IOException
+ */
+ public AnalyzedSentence getAnalyzedSentence(final String sentence)
+ throws IOException {
+ // disambiguate assigned tags & return
+ return disambiguator.disambiguate(getRawAnalyzedSentence(sentence));
+ }
+
+ /**
+ * Tokenizes the given <code>sentence</code> into words and analyzes it.
+ *
+ * @since 0.9.8
+ * @param sentence
+ * Sentence to be analyzed
+ * @return
+ * AnalyzedSentence
+ * @throws IOException
+ */
+ public AnalyzedSentence getRawAnalyzedSentence(final String sentence) throws IOException {
+ final List<String> tokens = wordTokenizer.tokenize(sentence);
+ final Map<Integer, String> softHyphenTokens = new HashMap<Integer, String>();
+
+ //for soft hyphens inside words, happens especially in OOo:
+ for (int i = 0; i < tokens.size(); i++) {
+ if (tokens.get(i).indexOf('\u00ad') != -1) {
+ softHyphenTokens.put(i, tokens.get(i));
+ tokens.set(i, tokens.get(i).replaceAll("\u00ad", ""));
+ }
+ }
+
+ final List<AnalyzedTokenReadings> aTokens = tagger.tag(tokens);
+ final int numTokens = aTokens.size();
+ int posFix = 0;
+ for (int i = 1; i < numTokens; i++) {
+ aTokens.get(i).setWhitespaceBefore(aTokens.get(i - 1).isWhitespace());
+ aTokens.get(i).setStartPos(aTokens.get(i).getStartPos() + posFix);
+ if (!softHyphenTokens.isEmpty()) {
+ if (softHyphenTokens.get(i) != null) {
+ aTokens.get(i).addReading(tagger.createToken(softHyphenTokens.get(i), null));
+ posFix += softHyphenTokens.get(i).length() - aTokens.get(i).getToken().length();
+ }
+ }
+ }
+
+ final AnalyzedTokenReadings[] tokenArray = new AnalyzedTokenReadings[tokens
+ .size() + 1];
+ final AnalyzedToken[] startTokenArray = new AnalyzedToken[1];
+ int toArrayCount = 0;
+ final AnalyzedToken sentenceStartToken = new AnalyzedToken("", SENTENCE_START_TAGNAME, null);
+ startTokenArray[0] = sentenceStartToken;
+ tokenArray[toArrayCount++] = new AnalyzedTokenReadings(startTokenArray, 0);
+ int startPos = 0;
+ for (final AnalyzedTokenReadings posTag : aTokens) {
+ posTag.setStartPos(startPos);
+ tokenArray[toArrayCount++] = posTag;
+ startPos += posTag.getToken().length();
+ }
+
+ // add additional tags
+ int lastToken = toArrayCount - 1;
+ // make SENT_END appear at last not whitespace token
+ for (int i = 0; i < toArrayCount - 1; i++) {
+ if (!tokenArray[lastToken - i].isWhitespace()) {
+ lastToken -= i;
+ break;
+ }
+ }
+
+ tokenArray[lastToken].setSentEnd();
+
+ if (tokenArray.length == lastToken + 1 && tokenArray[lastToken].isLinebreak()) {
+ tokenArray[lastToken].setParaEnd();
+ }
+ return new AnalyzedSentence(tokenArray);
+ }
+
+ /**
+ * Get all rules for the current language that are built-in or that have been
+ * added using {@link #addRule}.
+ * @return a List of {@link Rule} objects
+ */
+ public List<Rule> getAllRules() {
+ final List<Rule> rules = new ArrayList<Rule>();
+ rules.addAll(builtinRules);
+ rules.addAll(userRules);
+ // Some rules have an internal state so they can do checks over sentence
+ // boundaries. These need to be reset so the checks don't suddenly
+ // work on different texts with the same data. However, it could be useful
+ // to keep the state information if we're checking a continuous text.
+ for (final Rule rule : rules) {
+ rule.reset();
+ }
+ return rules;
+ }
+
+ /**
+ * Number of sentences the latest call to check() has checked.
+ */
+ public int getSentenceCount() {
+ return sentenceCount;
+
+ }
+
+ private void printIfVerbose(final String s) {
+ if (printStream != null) {
+ printStream.println(s);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java b/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java
new file mode 100644
index 0000000..a565058
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/Language.java
@@ -0,0 +1,336 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import de.danielnaber.languagetool.language.Belarusian;
+import de.danielnaber.languagetool.language.Catalan;
+import de.danielnaber.languagetool.language.Contributor;
+import de.danielnaber.languagetool.language.Demo;
+import de.danielnaber.languagetool.language.Danish;
+import de.danielnaber.languagetool.language.Bokmal;
+import de.danielnaber.languagetool.language.Dutch;
+import de.danielnaber.languagetool.language.English;
+import de.danielnaber.languagetool.language.Esperanto;
+import de.danielnaber.languagetool.language.French;
+import de.danielnaber.languagetool.language.Galician;
+import de.danielnaber.languagetool.language.German;
+import de.danielnaber.languagetool.language.Icelandic;
+import de.danielnaber.languagetool.language.Italian;
+import de.danielnaber.languagetool.language.Lithuanian;
+import de.danielnaber.languagetool.language.Malayalam;
+import de.danielnaber.languagetool.language.Polish;
+import de.danielnaber.languagetool.language.Romanian;
+import de.danielnaber.languagetool.language.Russian;
+import de.danielnaber.languagetool.language.Slovak;
+import de.danielnaber.languagetool.language.Slovenian;
+import de.danielnaber.languagetool.language.Spanish;
+import de.danielnaber.languagetool.language.Swedish;
+import de.danielnaber.languagetool.language.Ukrainian;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.xx.DemoDisambiguator;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Base class for any supported language (English, German, etc).
+ *
+ * @author Daniel Naber
+ */
+public abstract class Language {
+
+ // NOTE: keep in sync with array below!
+ //public final static Language CZECH = new Czech();
+ public static final Language DANISH = new Danish();
+ public static final Language BOKMAL = new Bokmal();
+ public static final Language DUTCH = new Dutch();
+ public static final Language ENGLISH = new English();
+ public static final Language ESPERANTO = new Esperanto();
+ public static final Language FRENCH = new French();
+ public static final Language GERMAN = new German();
+ public static final Language ITALIAN = new Italian();
+ public static final Language LITHUANIAN = new Lithuanian();
+ public static final Language POLISH = new Polish();
+ public static final Language SLOVAK = new Slovak();
+ public static final Language SLOVENIAN = new Slovenian();
+ public static final Language SPANISH = new Spanish();
+ public static final Language SWEDISH = new Swedish();
+ public static final Language UKRAINIAN = new Ukrainian();
+ public static final Language RUSSIAN = new Russian();
+ public static final Language ROMANIAN = new Romanian();
+ public static final Language ICELANDIC = new Icelandic();
+ public static final Language GALICIAN = new Galician();
+ public static final Language CATALAN = new Catalan();
+ public static final Language MALAYALAM = new Malayalam();
+ public static final Language BELARUSIAN = new Belarusian();
+
+ public static final Language DEMO = new Demo();
+
+ private static List<Language> externalLanguages = new ArrayList<Language>();
+
+ /**
+ * All languages supported by LanguageTool.
+ */
+ public static Language[] LANGUAGES = {
+ ENGLISH, GERMAN, POLISH, FRENCH, SPANISH, ITALIAN, DUTCH, LITHUANIAN, UKRAINIAN, RUSSIAN,
+ SLOVAK, SLOVENIAN, SWEDISH, ROMANIAN, ICELANDIC, GALICIAN, CATALAN, DANISH,
+ MALAYALAM, BELARUSIAN, ESPERANTO, BOKMAL,
+ DEMO
+ // FIXME: load dynamically from classpath
+ };
+
+ /**
+ * All languages supported by LanguageTool, but without the demo language.
+ */
+ public static Language[] REAL_LANGUAGES = new Language[LANGUAGES.length-1];
+ static {
+ int i = 0;
+ for (final Language lang : LANGUAGES) {
+ if (lang != DEMO) {
+ REAL_LANGUAGES[i] = lang;
+ i++;
+ }
+ }
+ }
+
+ private static final Language[] BUILTIN_LANGUAGES = LANGUAGES;
+
+ private static final Disambiguator DEMO_DISAMBIGUATOR = new DemoDisambiguator();
+ private static final Tagger DEMO_TAGGER = new DemoTagger();
+ private static final SentenceTokenizer SENTENCE_TOKENIZER = new SentenceTokenizer();
+ private static final WordTokenizer WORD_TOKENIZER = new WordTokenizer();
+ private static final Unifier MATCH_UNIFIER = new Unifier();
+
+ // -------------------------------------------------------------------------
+
+ /**
+ * Get this language's two character code, e.g. <code>en</code> for English.
+ * @return String - language code
+ */
+ public abstract String getShortName();
+
+ /**
+ * Get this language's name in English, e.g. <code>English</code> or <code>German</code>.
+ * @return String - language name
+ */
+ public abstract String getName();
+
+ /**
+ * Get this language's variants, e.g. <code>US</code> (as in <code>en_US</code>) or
+ * <code>PL</code> (as in <code>pl_PL</code>).
+ * @return String[] - array of country variants for the language.
+ */
+ public abstract String[] getCountryVariants();
+
+ /**
+ * Get this language's Java locale.
+ */
+ public abstract Locale getLocale();
+
+ /**
+ * Get the name(s) of the maintainer(s) for this language or <code>null</code>.
+ */
+ public abstract Contributor[] getMaintainers();
+
+ /**
+ * Get the IDs of the global rules that should run for texts in this language
+ * or <code>null</code>.
+ */
+ public abstract Set<String> getRelevantRuleIDs();
+
+ // -------------------------------------------------------------------------
+
+ /**
+ * Get the location of the rule file.
+ */
+ public String getRuleFileName() {
+ return JLanguageTool.getDataBroker().getRulesDir() + "/" + getShortName() + "/" + JLanguageTool.PATTERN_FILE;
+ }
+
+ /**
+ * Get this language's part-of-speech disambiguator implementation.
+ */
+ public Disambiguator getDisambiguator() {
+ return DEMO_DISAMBIGUATOR;
+ }
+
+ /**
+ * Get this language's part-of-speech tagger implementation.
+ */
+ public Tagger getTagger() {
+ return DEMO_TAGGER;
+ }
+
+ /**
+ * Get this language's sentence tokenizer implementation.
+ */
+ public SentenceTokenizer getSentenceTokenizer() {
+ return SENTENCE_TOKENIZER;
+ }
+
+ /**
+ * Get this language's word tokenizer implementation.
+ */
+ public Tokenizer getWordTokenizer() {
+ return WORD_TOKENIZER;
+ }
+
+ /**
+ * Get this language's part-of-speech synthesizer implementation or <code>null</code>.
+ */
+ public Synthesizer getSynthesizer() {
+ return null;
+ }
+
+ /**
+ * Get this language's feature unifier.
+ * @return Feature unifier for analyzed tokens.
+ */
+ public Unifier getUnifier() {
+ return MATCH_UNIFIER;
+ }
+
+ /**
+ * Get this language's feature unifier used for disambiguation.
+ * Note: it might be different from the normal rule unifier.
+ * @return Feature unifier for analyzed tokens.
+ */
+ public Unifier getDisambiguationUnifier() {
+ return MATCH_UNIFIER;
+ }
+
+ /**
+ * Get the name of the language translated to the current locale,
+ * if available. Otherwise, get the untranslated name.
+ */
+ public final String getTranslatedName(final ResourceBundle messages) {
+ try {
+ return messages.getString(getShortName());
+ } catch (final MissingResourceException e) {
+ return getName();
+ }
+ }
+
+ // -------------------------------------------------------------------------
+
+ /**
+ * Re-inits the built-in languages and adds the specified ones.
+ */
+ public static void reInit(final List<Language> languages) {
+ LANGUAGES = new Language[BUILTIN_LANGUAGES.length + languages.size()];
+ int i = BUILTIN_LANGUAGES.length;
+ System.arraycopy(BUILTIN_LANGUAGES, 0,
+ LANGUAGES, 0, BUILTIN_LANGUAGES.length);
+ for (final Language lang : languages) {
+ LANGUAGES[i++] = lang;
+ }
+ externalLanguages = languages;
+ }
+
+ /**
+ * Return languages that are not built-in but have been added manually.
+ */
+ public static List<Language> getExternalLanguages() {
+ return externalLanguages;
+ }
+
+ /**
+ * Get the Language object for the given short language name.
+ *
+ * @param shortLanguageCode e.g. <code>en</code> or <code>de</code>
+ * @return a Language object or <code>null</code>
+ */
+ public static Language getLanguageForShortName(final String shortLanguageCode) {
+ StringTools.assureSet(shortLanguageCode, "shortLanguageCode");
+ if (shortLanguageCode.length() != "xx".length()) {
+ throw new IllegalArgumentException("'" + shortLanguageCode + "' isn't a two-character code");
+ }
+ for (Language element : Language.LANGUAGES) {
+ if (shortLanguageCode.equals(element.getShortName())) {
+ return element;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Get the Language object for the given language name.
+ *
+ * @param languageName e.g. <code>English</code> or <code>German</code> (case is significant)
+ * @return a Language object or <code>null</code>
+ */
+ public static Language getLanguageForName(final String languageName) {
+ for (Language element : Language.LANGUAGES) {
+ if (languageName.equals(element.getName())) {
+ return element;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public final String toString() {
+ return getName();
+ }
+
+ /**
+ * Get sorted info about all maintainers.
+ * @since 0.9.9
+ * @param messages
+ * {{@link ResourceBundle} language bundle to translate
+ * the info
+ * @return
+ * A sorted list of maintainers.
+ */
+ public static String getAllMaintainers(final ResourceBundle messages) {
+ final StringBuilder maintainersInfo = new StringBuilder();
+ final List<String> toSort = new ArrayList<String>();
+ for (final Language lang : Language.LANGUAGES) {
+ if (lang != Language.DEMO) {
+ if (lang.getMaintainers() != null) {
+ final List<String> names = new ArrayList<String>();
+ for (Contributor contributor : lang.getMaintainers()) {
+ names.add(contributor.getName());
+ }
+ toSort.add(messages.getString(lang.getShortName()) +
+ ": " + StringTools.listToString(names, ", "));
+ }
+ }
+ }
+ Collections.sort(toSort);
+ for (final String lElem : toSort) {
+ maintainersInfo.append(lElem);
+ maintainersInfo.append('\n');
+ }
+ return maintainersInfo.toString();
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/Main.java b/JLanguageTool/src/java/de/danielnaber/languagetool/Main.java
new file mode 100644
index 0000000..f2f2cc6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/Main.java
@@ -0,0 +1,567 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.bitext.TabBitextReader;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.bitext.BitextRule;
+import de.danielnaber.languagetool.tools.StringTools;
+import de.danielnaber.languagetool.tools.Tools;
+
+/**
+ * The command line tool to check plain text files.
+ *
+ * @author Daniel Naber
+ */
+class Main {
+
+ private JLanguageTool lt;
+ private final boolean verbose;
+ private final boolean apiFormat;
+ private final boolean taggerOnly;
+ private final boolean applySuggestions;
+ private boolean profileRules;
+ private boolean bitextMode;
+ private JLanguageTool srcLt;
+ List<BitextRule> bRules;
+ private Rule currentRule;
+
+ /* maximum file size to read in a single read */
+ private static final int MAX_FILE_SIZE = 64000;
+
+ Main(final boolean verbose, final boolean taggerOnly,
+ final Language language, final Language motherTongue,
+ final String[] disabledRules, final String[] enabledRules,
+ final boolean apiFormat, boolean applySuggestions) throws IOException,
+ SAXException, ParserConfigurationException {
+ this.verbose = verbose;
+ this.apiFormat = apiFormat;
+ this.taggerOnly = taggerOnly;
+ this.applySuggestions = applySuggestions;
+ profileRules = false;
+ bitextMode = false;
+ srcLt = null;
+ bRules = null;
+ lt = new JLanguageTool(language, motherTongue);
+ lt.activateDefaultPatternRules();
+ lt.activateDefaultFalseFriendRules();
+ selectRules(lt, disabledRules, enabledRules);
+ }
+
+ private void selectRules(final JLanguageTool lt, final String[] disabledRules, final String[] enabledRules) {
+ // disable rules that are disabled explicitly:
+ for (final String disabledRule : disabledRules) {
+ lt.disableRule(disabledRule);
+ }
+ // disable all rules except those enabled explicitly, if any:
+ if (enabledRules.length > 0) {
+ final Set<String> enabledRuleIDs = new HashSet<String>(Arrays
+ .asList(enabledRules));
+ for (String ruleName : enabledRuleIDs) {
+ lt.enableDefaultOffRule(ruleName);
+ lt.enableRule(ruleName);
+ }
+ for (Rule rule : lt.getAllRules()) {
+ if (!enabledRuleIDs.contains(rule.getId())) {
+ lt.disableRule(rule.getId());
+ }
+ }
+ }
+ }
+
+ private void setListUnknownWords(final boolean listUnknownWords) {
+ lt.setListUnknownWords(listUnknownWords);
+ }
+
+ private void setProfilingMode() {
+ profileRules = true;
+ }
+
+ private final void setBitextMode(final Language sourceLang,
+ final String[] disabledRules, final String[] enabledRules) throws IOException, ParserConfigurationException, SAXException {
+ bitextMode = true;
+ Language target = lt.getLanguage();
+ lt = new JLanguageTool(target, null);
+ srcLt = new JLanguageTool(sourceLang);
+ lt.activateDefaultPatternRules();
+ selectRules(lt, disabledRules, enabledRules);
+ selectRules(srcLt, disabledRules, enabledRules);
+ bRules = Tools.getBitextRules(sourceLang, lt.getLanguage());
+
+ List<BitextRule> bRuleList = new ArrayList<BitextRule>(bRules);
+ for (final BitextRule br : bRules) {
+ for (final String disabledRule : disabledRules) {
+ if (br.getId().equals(disabledRule)) {
+ bRuleList.remove(br);
+ }
+ }
+ }
+ bRules = bRuleList;
+ if (enabledRules.length > 0) {
+ bRuleList = new ArrayList<BitextRule>();
+ for (final String enabledRule : enabledRules) {
+ for (final BitextRule br : bRules) {
+ if (br.getId().equals(enabledRule)) {
+ bRuleList.add(br);
+ }
+ }
+ }
+ bRules = bRuleList;
+ }
+ }
+
+ JLanguageTool getJLanguageTool() {
+ return lt;
+ }
+
+ private void runOnFile(final String filename, final String encoding,
+ final boolean listUnknownWords) throws IOException {
+ boolean oneTime = false;
+ if (!"-".equals(filename)) {
+ final File file = new File(filename);
+ // run once on file if the file size < MAXFILESIZE or
+ // when we use the bitext mode (we use a bitext reader
+ // instead of a direct file access)
+ oneTime = file.length() < MAX_FILE_SIZE || bitextMode;
+ }
+ if (oneTime) {
+ if (bitextMode) {
+ //TODO: add parameter to set different readers
+ TabBitextReader reader = new TabBitextReader(filename, encoding);
+ if (applySuggestions) {
+ Tools.correctBitext(reader, srcLt, lt, bRules);
+ } else {
+ Tools.checkBitext(reader, srcLt, lt, bRules,
+ apiFormat);
+ }
+ } else {
+ final String text = getFilteredText(filename, encoding);
+ if (applySuggestions) {
+ System.out.print(Tools.correctText(text, lt));
+ } else if (profileRules) {
+ Tools.profileRulesOnText(text, lt);
+ } else if (!taggerOnly) {
+ Tools.checkText(text, lt, apiFormat, 0);
+ } else {
+ Tools.tagText(text, lt);
+ }
+ if (listUnknownWords) {
+ System.out.println("Unknown words: " + lt.getUnknownWords());
+ }
+ }
+ } else {
+ if (verbose) {
+ lt.setOutput(System.err);
+ }
+ if (!apiFormat && !applySuggestions) {
+ if ("-".equals(filename)) {
+ System.out.println("Working on STDIN...");
+ } else {
+ System.out.println("Working on " + filename + "...");
+ }
+ }
+ int runCount = 1;
+ final List<Rule> rules = lt.getAllRules();
+ if (profileRules) {
+ System.out.printf("Testing %d rules\n", rules.size());
+ System.out.println("Rule ID\tTime\tSentences\tMatches\tSentences per sec.");
+ runCount = rules.size();
+ }
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ int lineOffset = 0;
+ int tmpLineOffset = 0;
+ final List<String> unknownWords = new ArrayList<String>();
+ StringBuilder sb = new StringBuilder();
+ for (int ruleIndex = 0; ruleIndex <runCount; ruleIndex++) {
+ currentRule = rules.get(ruleIndex);
+ int matches = 0;
+ long sentences = 0;
+ final long startTime = System.currentTimeMillis();
+ try {
+ if (!"-".equals(filename)) {
+ final File file = new File(filename);
+ if (encoding != null) {
+ isr = new InputStreamReader(new BufferedInputStream(
+ new FileInputStream(file.getAbsolutePath())), encoding);
+ } else {
+ isr = new InputStreamReader(new BufferedInputStream(
+ new FileInputStream(file.getAbsolutePath())));
+ }
+ } else {
+ if (encoding != null) {
+ isr = new InputStreamReader(new BufferedInputStream(System.in),
+ encoding);
+ } else {
+ isr = new InputStreamReader(new BufferedInputStream(System.in));
+ }
+ }
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ sb.append('\n');
+ tmpLineOffset++;
+ if (lt.getLanguage().getSentenceTokenizer()
+ .singleLineBreaksMarksPara()) {
+ matches = handleLine(matches, lineOffset, sb);
+ sentences += lt.getSentenceCount();
+ if (profileRules) {
+ sentences += lt.sentenceTokenize(sb.toString()).size();
+ }
+ if (listUnknownWords && !taggerOnly) {
+ for (String word : lt.getUnknownWords())
+ if (!unknownWords.contains(word)) {
+ unknownWords.add(word);
+ }
+ }
+ sb = new StringBuilder();
+ lineOffset = tmpLineOffset;
+ } else {
+ if ("".equals(line) || sb.length() >= MAX_FILE_SIZE) {
+ matches = handleLine(matches, lineOffset, sb);
+ sentences += lt.getSentenceCount();
+ if (profileRules) {
+ sentences += lt.sentenceTokenize(sb.toString()).size();
+ }
+ if (listUnknownWords && !taggerOnly) {
+ for (String word : lt.getUnknownWords())
+ if (!unknownWords.contains(word)) {
+ unknownWords.add(word);
+ }
+ }
+ sb = new StringBuilder();
+ lineOffset = tmpLineOffset;
+ }
+ }
+ }
+ } finally {
+
+ if (sb.length() > 0) {
+ matches = handleLine(matches, tmpLineOffset - 1, sb);
+ sentences += lt.getSentenceCount();
+ if (profileRules) {
+ sentences += lt.sentenceTokenize(sb.toString()).size();
+ }
+ if (listUnknownWords && !taggerOnly) {
+ for (String word : lt.getUnknownWords())
+ if (!unknownWords.contains(word)) {
+ unknownWords.add(word);
+ }
+ }
+ }
+
+ printTimingInformation(listUnknownWords, rules, unknownWords, ruleIndex, matches, sentences, startTime);
+
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ }
+ }
+ }
+
+ private void printTimingInformation(final boolean listUnknownWords, final List<Rule> rules,
+ final List<String> unknownWords, final int ruleIndex, final int matches, final long sentences, final long startTime) {
+ if (!applySuggestions) {
+ final long endTime = System.currentTimeMillis();
+ final long time = endTime - startTime;
+ final float timeInSeconds = time / 1000.0f;
+ final float sentencesPerSecond = sentences / timeInSeconds;
+ if (apiFormat) {
+ System.out.println("<!--");
+ }
+ if (profileRules) {
+ //TODO: run 10 times, line in runOnce mode, and use median
+ System.out.printf(Locale.ENGLISH,
+ "%s\t%d\t%d\t%d\t%.1f", rules.get(ruleIndex).getId(),
+ time, sentences, matches, sentencesPerSecond);
+ System.out.println();
+ } else {
+ System.out.printf(Locale.ENGLISH,
+ "Time: %dms for %d sentences (%.1f sentences/sec)", time,
+ sentences, sentencesPerSecond);
+ System.out.println();
+ }
+ if (listUnknownWords) {
+ Collections.sort(unknownWords);
+ System.out.println("Unknown words: " + unknownWords);
+ }
+ if (apiFormat) {
+ System.out.println("-->");
+ }
+ }
+ }
+
+ private int handleLine(final int matchNo, final int lineOffset,
+ final StringBuilder sb) throws IOException {
+ int matches = matchNo;
+ if (applySuggestions) {
+ System.out.print(Tools.correctText(StringTools.filterXML(sb.toString()),
+ lt));
+ } else if (profileRules) {
+ matches += Tools.profileRulesOnLine(StringTools.filterXML(sb.toString()),
+ lt, currentRule);
+ } else if (!taggerOnly) {
+ if (matches == 0) {
+ matches += Tools.checkText(StringTools.filterXML(sb.toString()), lt,
+ apiFormat, -1, lineOffset, matches,
+ StringTools.XmlPrintMode.START_XML);
+ } else {
+ matches += Tools.checkText(StringTools.filterXML(sb.toString()), lt,
+ apiFormat, -1, lineOffset, matches,
+ StringTools.XmlPrintMode.CONTINUE_XML);
+ }
+ } else {
+ Tools.tagText(StringTools.filterXML(sb.toString()), lt);
+ }
+ return matches;
+ }
+
+ private void runRecursive(final String filename, final String encoding,
+ final boolean listUnknown) throws IOException,
+ ParserConfigurationException, SAXException {
+ final File dir = new File(filename);
+ if (!dir.isDirectory()) {
+ throw new IllegalArgumentException(dir.getAbsolutePath()
+ + " is not a directory, cannot use recursion");
+ }
+ final File[] files = dir.listFiles();
+ for (final File file : files) {
+ if (file.isDirectory()) {
+ runRecursive(file.getAbsolutePath(), encoding, listUnknown);
+ } else {
+ runOnFile(file.getAbsolutePath(), encoding, listUnknown);
+ }
+ }
+ }
+
+ /**
+ * Loads filename and filters out XML. Note that the XML
+ * filtering can lead to incorrect positions in the list of matching rules.
+ *
+ * @param filename
+ * @throws IOException
+ */
+ private String getFilteredText(final String filename, final String encoding)
+ throws IOException {
+ if (verbose) {
+ lt.setOutput(System.err);
+ }
+ if (!apiFormat && !applySuggestions) {
+ System.out.println("Working on " + filename + "...");
+ }
+ final String fileContents = StringTools.readFile(new FileInputStream(
+ filename), encoding);
+ return StringTools.filterXML(fileContents);
+ }
+
+ private static void exitWithUsageMessage() {
+ System.out
+ .println("Usage: java de.danielnaber.languagetool.Main "
+ + "[-r|--recursive] [-v|--verbose] [-l|--language LANG] [-m|--mothertongue LANG] [-d|--disable RULES] "
+ + "[-e|--enable RULES] [-c|--encoding] [-u|--list-unknown] [-t|--taggeronly] [-b] [--api] [-a|--apply] "
+ + "[-b2|--bitext] <file>");
+ System.exit(1);
+ }
+
+ /**
+ * Command line tool to check plain text files.
+ */
+ public static void main(final String[] args) throws IOException,
+ ParserConfigurationException, SAXException {
+ if (args.length < 1 || args.length > 9) {
+ exitWithUsageMessage();
+ }
+ boolean verbose = false;
+ boolean recursive = false;
+ boolean taggerOnly = false;
+ boolean singleLineBreakMarksParagraph = false;
+ boolean apiFormat = false;
+ boolean listUnknown = false;
+ boolean applySuggestions = false;
+ boolean profile = false;
+ boolean bitext = false;
+ Language language = null;
+ Language motherTongue = null;
+ String encoding = null;
+ String filename = null;
+ String[] disabledRules = new String[0];
+ String[] enabledRules = new String[0];
+ for (int i = 0; i < args.length; i++) {
+ if (args[i].equals("-h") || args[i].equals("-help")
+ || args[i].equals("--help") || args[i].equals("--?")) {
+ exitWithUsageMessage();
+ } else if (args[i].equals("-v") || args[i].equals("--verbose")) {
+ verbose = true;
+ } else if (args[i].equals("-t") || args[i].equals("--taggeronly")) {
+ taggerOnly = true;
+ if (listUnknown) {
+ throw new IllegalArgumentException(
+ "You cannot list unknown words when tagging only.");
+ }
+ if (applySuggestions) {
+ throw new IllegalArgumentException(
+ "You cannot apply suggestions when tagging only.");
+ }
+ } else if (args[i].equals("-r") || args[i].equals("--recursive")) {
+ recursive = true;
+ } else if (args[i].equals("-b2") || args[i].equals("--bitext")) {
+ bitext = true;
+ } else if (args[i].equals("-d") || args[i].equals("--disable")) {
+ if (enabledRules.length > 0) {
+ throw new IllegalArgumentException(
+ "You cannot specify both enabled and disabled rules");
+ }
+ final String rules = args[++i];
+ disabledRules = rules.split(",");
+ } else if (args[i].equals("-e") || args[i].equals("--enable")) {
+ if (disabledRules.length > 0) {
+ throw new IllegalArgumentException(
+ "You cannot specify both enabled and disabled rules");
+ }
+ final String rules = args[++i];
+ enabledRules = rules.split(",");
+ } else if (args[i].equals("-l") || args[i].equals("--language")) {
+ language = getLanguageOrExit(args[++i]);
+ } else if (args[i].equals("-m") || args[i].equals("--mothertongue")) {
+ motherTongue = getLanguageOrExit(args[++i]);
+ } else if (args[i].equals("-c") || args[i].equals("--encoding")) {
+ encoding = args[++i];
+ } else if (args[i].equals("-u") || args[i].equals("--list-unknown")) {
+ listUnknown = true;
+ if (taggerOnly) {
+ throw new IllegalArgumentException(
+ "You cannot list unknown words when tagging only.");
+ }
+ } else if (args[i].equals("-b")) {
+ singleLineBreakMarksParagraph = true;
+ } else if (args[i].equals("--api")) {
+ apiFormat = true;
+ if (applySuggestions) {
+ throw new IllegalArgumentException(
+ "API format makes no sense for automatic application of suggestions.");
+ }
+ } else if (args[i].equals("-a") || args[i].equals("--apply")) {
+ applySuggestions = true;
+ if (taggerOnly) {
+ throw new IllegalArgumentException(
+ "You cannot apply suggestions when tagging only.");
+ }
+ if (apiFormat) {
+ throw new IllegalArgumentException(
+ "API format makes no sense for automatic application of suggestions.");
+ }
+ } else if (args[i].equals("-p") || args[i].equals("--profile")) {
+ profile = true;
+ if (apiFormat) {
+ throw new IllegalArgumentException(
+ "API format makes no sense for profiling.");
+ }
+ if (applySuggestions) {
+ throw new IllegalArgumentException(
+ "Applying suggestions makes no sense for profiling.");
+ }
+ if (taggerOnly) {
+ throw new IllegalArgumentException(
+ "Tagging makes no sense for profiling.");
+ }
+ } else if (i == args.length - 1) {
+ filename = args[i];
+ } else {
+ System.err.println("Unknown option: " + args[i]);
+ exitWithUsageMessage();
+ }
+ }
+ if (filename == null) {
+ filename = "-";
+ }
+ if (language == null) {
+ if (!apiFormat) {
+ System.err.println("No language specified, using English");
+ }
+ language = Language.ENGLISH;
+ } else if (!apiFormat && !applySuggestions) {
+ System.out.println("Expected text language: " + language.getName());
+ }
+ language.getSentenceTokenizer().setSingleLineBreaksMarksParagraph(
+ singleLineBreakMarksParagraph);
+ final Main prg = new Main(verbose, taggerOnly, language, motherTongue,
+ disabledRules, enabledRules, apiFormat, applySuggestions);
+ prg.setListUnknownWords(listUnknown);
+ if (profile) {
+ prg.setProfilingMode();
+ }
+ if (bitext) {
+ if (motherTongue == null) {
+ throw new IllegalArgumentException(
+ "You have to set the source language (as mother tongue).");
+ }
+ prg.setBitextMode(motherTongue, disabledRules, enabledRules);
+ }
+ if (recursive) {
+ prg.runRecursive(filename, encoding, listUnknown);
+ } else {
+ prg.runOnFile(filename, encoding, listUnknown);
+ }
+ }
+
+ private static Language getLanguageOrExit(final String lang) {
+ Language language = null;
+ boolean foundLanguage = false;
+ final List<String> supportedLanguages = new ArrayList<String>();
+ for (final Language tmpLang : Language.LANGUAGES) {
+ supportedLanguages.add(tmpLang.getShortName());
+ if (lang.equals(tmpLang.getShortName())) {
+ language = tmpLang;
+ foundLanguage = true;
+ break;
+ }
+ }
+ if (!foundLanguage) {
+ System.out.println("Unknown language '" + lang
+ + "'. Supported languages are: " + supportedLanguages);
+ exitWithUsageMessage();
+ }
+ return language;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle.properties
new file mode 100644
index 0000000..921a510
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle.properties
@@ -0,0 +1,186 @@
+# English translation of LanguageTool
+# Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de), Marcin Milkowski
+
+be = Belarusian
+
+ca = Catalan
+
+category_case = Capitalization
+
+category_false_friend = False friends
+
+category_grammar = Grammar
+
+category_misc = Miscellaneous
+
+category_typo = Possible Typo
+
+checkDone = Check done, {0} potential problems found
+
+checkText = &Check Text
+
+correctionMessage = Correction:
+
+cs = Czech
+
+da = Danish
+
+de = German
+
+desc_comma_whitespace = Use of whitespace before comma and before/after parentheses
+
+desc_double_punct = Use of two consecutive dots or commas
+
+desc_repetition = Word repetition (e.g. 'will will')
+
+desc_repetition_short = Word repetition
+
+desc_unpaired_brackets = Unpaired braces, brackets, quotation marks and similar symbols
+
+desc_uppercase_sentence = Checks that a sentence starts with an uppercase letter
+
+desc_whitespacerepetition = Whitespace repetition (bad formatting)
+
+double_dots_short = Two consecutive dots
+
+double_commas_short = Two consecutive comma
+
+en = English
+
+enterText = Please type or paste text to check in the top area
+
+enterText2 = Please insert text to check here
+
+eo = Esperanto
+
+errorContext = Context:
+
+errorMessage = Message:
+
+es = Spanish
+
+false_friend = False friend
+
+false_friend_desc = false friend hint for:
+
+false_friend_hint = Hint: "{0}" ({1}) means {2} ({3}).
+
+false_friend_suggestion = Did you mean {0}?
+
+fr = French
+
+gl = Galician
+
+guiCancelButton = Cancel
+
+guiCheckComplete = LanguageTool check is complete.
+
+guiConfigWindowTitle = LanguageTool Options
+
+guiDemoText = This is a example input to to show you how LanguageTool works. Note, however, that it does not include a spell checka.
+
+guiMatchCount = Potential errors:
+
+guiMenuAbout = &About...
+
+guiMenuAddRules = Load &Rule File
+
+guiMenuCheckClipboard = &Check Text in Clipboard
+
+guiMenuFile = &File
+
+guiMenuHelp = &Help
+
+guiMenuHide = &Hide to System Tray
+
+guiMenuOpen = &Open...
+
+guiMenuOptions = Option&s...
+
+guiMenuQuit = &Quit
+
+guiMenuShowMainWindow = Open Main Window
+
+guiMotherTongue = Your mother tongue:
+
+guiNoErrorsFound = No errors or warnings found (language: {0})
+
+guiNoErrorsFoundSelectedText = No errors or warnings found in selected text (language: {0})
+
+guiOKButton = &OK
+
+guiOOoChangeButton = &Change
+
+guiOOoCloseButton = Close
+
+guiOOoIgnoreAllButton = Ignore All
+
+guiOOoIgnoreButton = Ignore
+
+guiOOoOptionsButton = Options...
+
+guiProgressWindowTitle = LanguageTool: Checking Text...
+
+guiReplaceWindowTitle = Replace text
+
+guiReplaceWithOtherText = <other text>
+
+guiRunOnPort = Run as server on po&rt
+
+guiSelectionCheckComplete = LanguageTool check of selected text is complete.
+
+incorrect_case = This sentence does not start with an uppercase letter
+
+is = Icelandic
+
+it = Italian
+
+lt = Lithuanian
+
+missing_space_after_comma = Put a space after the comma
+
+ml= Malayalam
+
+nl = Dutch
+
+no_space_after = Don't put a space after the opening parenthesis
+
+no_space_before = Don't put a space before the closing parenthesis
+
+no_space_before_dot = Don't put a space before the full stop
+
+pl = Polish
+
+repetition = Possible typo: you repeated a word
+
+result1 = <br><b> {0}. Line {1}, column {2}</b><br>
+
+resultAreaText = Results will appear here
+
+resultTime = <br>Time: {0}ms (including {1}ms for rule matching)<br>
+
+ru = Russian
+
+sk = Slovak
+
+sl = Slovenian
+
+space_after_comma = Put a space after the comma, but not before the comma
+
+startChecking = Starting check in {0}
+
+sv = Swedish
+
+textLanguage = Text Language:
+
+two_commas = Two consecutive commas
+
+two_dots = Two consecutive dots
+
+uk = Ukrainian
+
+unpaired_brackets = Unpaired bracket or similar symbol
+
+whitespace_repetition = Possible typo: you repeated a whitespace
+
+ro = Romanian \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_be.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_be.properties
new file mode 100644
index 0000000..9a7212b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_be.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=\u0411\u0435\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f
+ca=\u041a\u0430\u0442\u0430\u043b\u043e\u043d\u0441\u043a\u0430\u044f
+category_case=\u0412\u044f\u043b\u0456\u043a\u0456\u044f \u043b\u0456\u0442\u0430\u0440\u044b
+category_false_friend=\u0410\u043c\u043e\u043d\u0456\u043c\u044b
+category_grammar=\u0413\u0440\u0430\u043c\u0430\u0442\u044b\u043a\u0430
+category_misc=\u0420\u043e\u0437\u043d\u0430\u0435
+category_typo=\u041c\u0430\u0433\u0447\u044b\u043c\u044b\u044f \u043f\u0430\u043c\u044b\u043b\u043a\u0456 \u043d\u0430\u0431\u043e\u0440\u0443
+checkDone=\u0421\u043f\u0440\u0430\u045e\u0434\u0436\u0430\u043d\u043d\u0435 \u0437\u0430\u0432\u0435\u0440\u0448\u0430\u043d\u0430, \u0437\u043d\u043e\u0439\u0434\u0437\u0435\u043d\u0430 {0} \u043c\u0430\u0433\u0447\u044b\u043c\u044b\u0445 \u043f\u0430\u043c\u044b\u043b\u0430\u043a
+checkText=\u0421\u043f\u0440\u0430\u045e\u0434\u0437\u0456\u0446\u044c \u0442\u044d\u043a\u0441\u0442
+correctionMessage=\u0412\u044b\u043f\u0440\u0430\u045e\u043b\u0435\u043d\u043d\u0456\:
+cs=\u0427\u044d\u0448\u0441\u043a\u0430\u044f
+da=\u0414\u0430\u0446\u043a\u0430\u044f
+de=\u041d\u044f\u043c\u0435\u0446\u043a\u0430\u044f
+desc_comma_whitespace=\u0412\u044b\u043a\u0430\u0440\u044b\u0441\u0442\u0430\u043d\u043d\u0435 \u043f\u0440\u0430\u0431\u0435\u043b\u0443 \u043f\u0435\u0440\u0430\u0434 \u043a\u043e\u0441\u043a\u0430\u0439 \u0446\u0456 \u0434\u0430 \u0456 \u043f\u0430\u0441\u043b\u044f \u0434\u0443\u0436\u043a\u0456
+desc_double_punct=\u0412\u044b\u043a\u0430\u0440\u044b\u0441\u0442\u0430\u043d\u043d\u0435 \u0434\u0432\u0443\u0445 \u043f\u0430\u0441\u043b\u044f\u0434\u043e\u045e\u043d\u044b\u0445 \u043a\u0440\u043e\u043f\u0430\u043a \u0456 \u043a\u043e\u0441\u043a\u0430\u0445
+desc_repetition=\u041f\u0430\u045e\u0442\u0430\u0440\u044d\u043d\u043d\u0435 \u0441\u043b\u043e\u0432\u0430\u045e (\u043d\u0430\u043f\u0440\u044b\u043a\u043b\u0430\u0434, "\u0431\u0443\u0434\u0443 \u0431\u0443\u0434\u0443")
+desc_repetition_short=\u041f\u0430\u045e\u0442\u0430\u0440\u044d\u043d\u043d\u0435 \u0441\u043b\u043e\u0432\u0430\u045e
+desc_unpaired_brackets=\u041d\u044f\u043f\u0430\u0440\u043d\u044b\u044f \u0434\u0443\u0436\u043a\u0456, \u0434\u0432\u0443\u043a\u043e\u0441\u0441\u0456 \u0446\u0456 \u043f\u0430\u0434\u043e\u0431\u043d\u044b\u044f \u0441\u0456\u043c\u0432\u0430\u043b\u044b
+desc_uppercase_sentence=\u041f\u0440\u0430\u0432\u044f\u0440\u0430\u0435, \u0448\u0442\u043e \u0441\u043a\u0430\u0437 \u043f\u0430\u0447\u044b\u043d\u0430\u0435\u0446\u0446\u0430 \u0437 \u0432\u044f\u043b\u0456\u043a\u0430\u0439 \u043b\u0456\u0442\u0430\u0440\u044b
+desc_whitespacerepetition=\u041f\u0430\u045e\u0442\u0430\u0440\u044d\u043d\u043d\u0435 \u043f\u0440\u0430\u0431\u0435\u043b\u0430\u045e (\u0434\u0440\u044d\u043d\u043d\u0430\u0435 \u0444\u0430\u0440\u043c\u0430\u0442\u0430\u0432\u0430\u043d\u043d\u0435)
+double_dots_short=\u0414\u0437\u0432\u0435 \u043f\u0430\u0441\u043b\u044f\u0434\u043e\u045e\u043d\u044b\u044f \u043a\u0440\u043e\u043f\u043a\u0456
+double_commas_short=\u0414\u0437\u0432\u0435 \u043f\u0430\u0441\u043b\u044f\u0434\u043e\u045e\u043d\u044b\u044f \u043a\u043e\u0441\u043a\u0456
+en=\u0410\u043d\u0433\u043b\u0456\u0439\u0441\u043a\u0430\u044f
+enterText=\u041a\u0430\u0431 \u0441\u043f\u0440\u0430\u045e\u0434\u0437\u0456\u0446\u044c, \u043d\u0430\u0431\u044f\u0440\u044b\u0446\u0435 \u0446\u0456 \u045e\u0441\u0442\u0430\u045e\u0446\u0435 \u0442\u044d\u043a\u0441\u0442 \u0443\u0432\u0435\u0440\u0441\u0435
+enterText2=\u041a\u0430\u043b\u0456 \u043b\u0430\u0441\u043a\u0430, \u045e\u0441\u0442\u0430\u045e\u0446\u0435 \u0442\u044d\u043a\u0441\u0442 \u043a\u0430\u0431 \u0441\u043f\u0440\u0430\u045e\u0434\u0437\u0456\u0446\u044c
+errorContext=\u041a\u0430\u043d\u0442\u044d\u043a\u0441\u0442\:
+errorMessage=\u041f\u0430\u0432\u0435\u0434\u0430\u043c\u043b\u0435\u043d\u043d\u0435\:
+es=\u0406\u0441\u043f\u0430\u043d\u0441\u043a\u0430\u044f
+false_friend=\u0410\u043c\u043e\u043d\u0456\u043c
+false_friend_desc=\u043f\u0430\u0434\u043a\u0430\u0437\u043a\u0430 \u0430\u043c\u043e\u043d\u0456\u043c\u0430 \u0434\u043b\u044f\:
+false_friend_hint=\u041f\u0430\u0434\u043a\u0430\u0437\u043a\u0430\: "{0}" ({1}) \u0430\u0437\u043d\u0430\u0447\u0430\u0435 {2} ({3}).
+false_friend_suggestion=\u0412\u044b \u043c\u0435\u043b\u0456 \u043d\u0430 \u045e\u0432\u0430\u0437\u0435 {0} ?
+fr=\u0424\u0440\u0430\u043d\u0446\u0443\u0437\u0441\u043a\u0430\u044f
+gl=\u0413\u0430\u043b\u0456\u0441\u0456\u0439\u0441\u043a\u0430\u044f
+guiCancelButton=\u0421\u043a\u0430\u0441\u0430\u0432\u0430\u0446\u044c
+guiCheckComplete=LanguageTool \u0437\u0430\u0432\u044f\u0440\u0448\u044b\u045e \u0441\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435.
+guiConfigWindowTitle=\u041d\u0430\u0441\u0442\u0430\u045e\u043b\u0435\u043d\u043d\u0456 LanguageTool
+guiDemoText=\u0413\u044d\u0442\u0430 \u043f\u0440\u044b\u043a\u043b\u0430\u0434 \u0442\u044d\u043a\u0441\u0442\u0443 \u043a\u0430\u0431 \u043f\u0430\u043a\u0430\u0437\u0430\u0446\u044c \u0432\u0430\u043c, \u044f\u043a \u043f\u0440\u0430\u0446\u0443\u0435 LanguageTool. \u041c\u0430\u0439\u0446\u0435 \u043d\u0430 \u045e\u0432\u0430\u0437\u0435, \u0430\u0434\u043d\u0430\u043a, \u0448\u0442\u043e \u044f\u043d\u043e \u043d\u0435 \u045e\u043a\u043b\u044e\u0447\u0430\u0435 \u0441\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435 \u0430\u0440\u0444\u0430\u0433\u0440\u0430\u0444\u0456\u0456.
+guiMatchCount=\u041c\u0430\u0433\u0447\u044b\u043c\u044b\u044f \u043f\u0430\u043c\u044b\u043b\u043a\u0456\:
+guiMenuAbout=\u0410\u0431 \u043f\u0440\u0430\u0433\u0440\u0430\u043c\u0435 ...
+guiMenuAddRules=\u0417\u0430\u0433\u0440\u0443\u0437\u0456\u0446\u044c \u0444\u0430\u0439\u043b \u043f\u0440\u0430\u0432\u0456\u043b\u0430\u045e
+guiMenuCheckClipboard=\u0421\u043f\u0440\u0430\u045e\u0434\u0437\u0456\u0446\u044c \u0442\u044d\u043a\u0441\u0442 \u0443 \u0430\u0431\u043c\u0435\u043d\u043d\u0456\u043a\u0443
+guiMenuFile=\u0424\u0430\u0439\u043b
+guiMenuHelp=\u0414\u0430\u0432\u0435\u0434\u043a\u0430
+guiMenuHide=\u0421\u0445\u0430\u0432\u0430\u0446\u044c \u045e \u0441\u0456\u0441\u0442\u044d\u043c\u043d\u044b \u0442\u0440\u044d\u0439
+guiMenuOpen=\u0410\u0434\u043a\u0440\u044b\u0446\u044c
+guiMenuOptions=\u041d\u0430\u0441\u0442\u0430\u045e\u043b\u0435\u043d\u043d\u0456
+guiMenuQuit=\u0412\u044b\u0439\u0441\u0446\u0456
+guiMenuShowMainWindow=\u0410\u0434\u043a\u0440\u044b\u0446\u044c \u0433\u0430\u043b\u043e\u045e\u043d\u0430\u0435 \u0430\u043a\u043d\u043e
+guiMotherTongue=\u0412\u0430\u0448\u0430 \u0440\u043e\u0434\u043d\u0430\u044f \u043c\u043e\u0432\u0430\:
+guiNoErrorsFound=\u041d\u044f \u0437\u043d\u043e\u0439\u0434\u0437\u0435\u043d\u0430 \u043f\u0430\u043c\u044b\u043b\u0430\u043a \u0430\u0431\u043e \u043f\u0430\u043f\u044f\u0440\u044d\u0434\u0436\u0430\u043d\u043d\u044f\u045e (\u043c\u043e\u0432\u0430\: {0})
+guiNoErrorsFoundSelectedText=\u041d\u044f \u0437\u043d\u043e\u0439\u0434\u0437\u0435\u043d\u0430 \u043f\u0430\u043c\u044b\u043b\u0430\u043a \u0430\u0431\u043e \u043f\u0430\u043f\u044f\u0440\u044d\u0434\u0436\u0430\u043d\u043d\u044f\u045e \u0443 \u0430\u0431\u0440\u0430\u043d\u044b\u043c \u0442\u044d\u043a\u0441\u0446\u0435 (\u043c\u043e\u0432\u0430\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=\u0417\u043c\u044f\u043d\u0456\u0446\u044c
+guiOOoCloseButton=\u0417\u0430\u043a\u0440\u044b\u0446\u044c
+guiOOoIgnoreAllButton=\u0406\u0433\u043d\u0430\u0440\u0430\u0432\u0430\u0446\u044c \u0443\u0441\u0435
+guiOOoIgnoreButton=\u0406\u0433\u043d\u0430\u0440\u0430\u0432\u0430\u0446\u044c
+guiOOoOptionsButton=\u041d\u0430\u0441\u0442\u0430\u045e\u043b\u0435\u043d\u043d\u0456...
+guiProgressWindowTitle=LanguageTool\: \u0441\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435 \u0442\u044d\u043a\u0441\u0442\u0443 ...
+guiReplaceWindowTitle=\u0417\u0430\u043c\u044f\u043d\u0456\u0446\u044c \u0442\u044d\u043a\u0441\u0442\u0443
+guiReplaceWithOtherText=<\u0456\u043d\u0448\u044b \u0442\u044d\u043a\u0441\u0442>
+guiRunOnPort=\u0417\u0430\u043f\u0443\u0441\u0446\u0456\u0446\u044c \u044f\u043a \u0441\u044d\u0440\u0432\u0435\u0440 \u043d\u0430 \u043f\u043e\u0440\u0446\u0435
+guiSelectionCheckComplete=\u0421\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435 LanguageTool \u0430\u0431\u0440\u0430\u043d\u0430\u0433\u0430 \u0442\u044d\u043a\u0441\u0442\u0443 \u0437\u0430\u0432\u0435\u0440\u0448\u0430\u043d\u0430.
+incorrect_case=\u0413\u044d\u0442\u044b \u0441\u043a\u0430\u0437 \u043d\u0435 \u043f\u0430\u0447\u044b\u043d\u0430\u0435\u0446\u0446\u0430 \u0437 \u0432\u044f\u043b\u0456\u043a\u0430\u0439 \u043b\u0456\u0442\u0430\u0440\u044b
+is=\u0406\u0441\u043b\u0430\u043d\u0434\u0441\u043a\u0430\u044f
+it=\u0406\u0442\u0430\u043b\u044c\u044f\u043d\u0441\u043a\u0430\u044f
+lt=\u041b\u0456\u0442\u043e\u045e\u0441\u043a\u0430\u044f
+missing_space_after_comma=\u041f\u0430\u0441\u0442\u0430\u0432\u0456\u0446\u044c \u043f\u0440\u0430\u0431\u0435\u043b \u043f\u0430\u0441\u043b\u044f \u043a\u043e\u0441\u043a\u0456
+ml=\u041c\u0430\u043b\u0430\u044f\u043b\u0430\u043c\u0441\u043a\u0430\u044f
+nl=\u0413\u0430\u043b\u0430\u043d\u0434\u0441\u043a\u0430\u044f
+no_space_after=\u041d\u0435 \u0441\u0442\u0430\u045e\u0446\u0435 \u043f\u0440\u0430\u0431\u0435\u043b \u043f\u0430\u0441\u043b\u044f \u043b\u0435\u0432\u0430\u0439 \u0434\u0443\u0436\u043a\u0456
+no_space_before=\u041d\u0435 \u0441\u0442\u0430\u045e\u0446\u0435 \u043f\u0440\u0430\u0431\u0435\u043b \u043f\u0435\u0440\u0430\u0434 \u043f\u0440\u0430\u0432\u0430\u0439 \u0434\u0443\u0436\u043a\u0456
+no_space_before_dot=\u041d\u0435 \u043f\u0430\u043a\u0456\u0434\u0430\u0439\u0446\u0435 \u043f\u0440\u0430\u0431\u0435\u043b \u043f\u0435\u0440\u0430\u0434 \u043a\u0440\u043e\u043f\u043a\u0430\u0439
+pl=\u041f\u043e\u043b\u044c\u0441\u043a\u0430\u044f
+repetition=\u041c\u0430\u0433\u0447\u044b\u043c\u0430\u044f \u043f\u0430\u043c\u044b\u043b\u043a\u0430 \u043d\u0430\u0431\u043e\u0440\u0443\: \u0432\u044b \u043f\u0430\u045e\u0442\u0430\u0440\u044b\u043b\u0456 \u0441\u043b\u043e\u0432\u0430
+result1=<br><b> {0}. \u0420\u0430\u0434\u043e\u043a {1}, \u043a\u0430\u043b\u043e\u043d\u043a\u0430 {2}</b><br>
+resultAreaText=\u0412\u044b\u043d\u0456\u043a\u0456 \u0437'\u044f\u0432\u044f\u0446\u0446\u0430 \u0442\u0443\u0442
+resultTime=<br>\u0427\u0430\u0441\: {0}\u043c\u0441 (\u0443\u043a\u043b\u044e\u0447\u0430\u044f {1}\u043c\u0441 \u043d\u0430 \u0441\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435 \u043f\u0440\u0430\u0432\u0456\u043b\u0430\u045e)<br>
+ru=\u0420\u0443\u0441\u043a\u0430\u044f
+sk=\u0421\u043b\u0430\u0432\u0430\u0446\u043a\u0430\u044f
+sl=\u0421\u043b\u0430\u0432\u0435\u043d\u0441\u043a\u0430\u044f
+space_after_comma=\u041f\u0430\u0441\u0442\u0430\u045e\u0446\u0435 \u043f\u0440\u0430\u0431\u0435\u043b \u043f\u0430\u0441\u043b\u044f \u043a\u043e\u0441\u043a\u0456, \u0430\u043b\u0435 \u043d\u0435 \u043f\u0435\u0440\u0430\u0434 \u043a\u043e\u0441\u043a\u0430\u0439
+startChecking=\u0421\u043f\u0440\u0430\u045e\u0434\u0436\u0432\u0430\u043d\u043d\u0435 \u043d\u0430 \u043c\u043e\u0432\u0435\: {0}
+sv=\u0428\u0432\u0435\u0434\u0441\u043a\u0430\u044f
+textLanguage=\u041c\u043e\u0432\u0430 \u0442\u044d\u043a\u0441\u0442\u0443\:
+two_commas=\u0414\u0437\u0432\u0435 \u043f\u0430\u0441\u043b\u044f\u0434\u043e\u045e\u043d\u044b\u044f \u043a\u043e\u0441\u043a\u0456
+two_dots=\u0414\u0437\u0432\u0435 \u043f\u0430\u0441\u043b\u044f\u0434\u043e\u045e\u043d\u044b\u044f \u043a\u0440\u043e\u043f\u043a\u0456
+uk=\u0423\u043a\u0440\u0430\u0456\u043d\u0441\u043a\u0430\u044f
+unpaired_brackets=\u041d\u044f\u043f\u0430\u0440\u043d\u044b\u044f \u0441\u043a\u043e\u0431\u043a\u0456 \u0430\u0431\u043e \u043f\u0430\u0434\u043e\u0431\u043d\u044b\u044f \u0441\u0456\u043c\u0432\u0430\u043b\u044b
+whitespace_repetition=\u041c\u0430\u0433\u0447\u044b\u043c\u0430\u044f \u043f\u0430\u043c\u044b\u043b\u043a\u0430 \u043d\u0430\u0431\u043e\u0440\u0443\: \u0432\u044b \u043f\u0430\u045e\u0442\u0430\u0440\u044b\u043b\u0456 \u043f\u0440\u0430\u0431\u0435\u043b
+ro=\u0420\u0443\u043c\u044b\u043d\u0441\u043a\u0430\u044f
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ca.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ca.properties
new file mode 100644
index 0000000..ef81842
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ca.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Belarusian
+ca=Catalan
+category_case=Capitalization
+category_false_friend=False friends
+category_grammar=Grammar
+category_misc=Miscellaneous
+category_typo=Possible Typo
+checkDone=Check done, {0} potential problems found
+checkText=&Check Text
+correctionMessage=Correction\:
+cs=Czech
+da=Danish
+de=German
+desc_comma_whitespace=Use of whitespace before comma and before/after parentheses
+desc_double_punct=Use of two consecutive dots or commas
+desc_repetition=Word repetition (e.g. 'will will')
+desc_repetition_short=Word repetition
+desc_unpaired_brackets=Unpaired braces, brackets, quotation marks and similar symbols
+desc_uppercase_sentence=Checks that a sentence starts with an uppercase letter
+desc_whitespacerepetition=Whitespace repetition (bad formatting)
+double_dots_short=Two consecutive dots
+double_commas_short=Two consecutive comma
+en=English
+enterText=Please type or paste text to check in the top area
+enterText2=Please insert text to check here
+errorContext=Context\:
+errorMessage=Message\:
+es=Spanish
+false_friend=False friend
+false_friend_desc=false friend hint for\:
+false_friend_hint=Hint\: "{0}" ({1}) means {2} ({3}).
+false_friend_suggestion=Did you mean {0}?
+fr=French
+gl=Galician
+guiCancelButton=Cancel
+guiCheckComplete=LanguageTool check is complete.
+guiConfigWindowTitle=LanguageTool Options
+guiDemoText=This is a example input to to show you how LanguageTool works. Note, however, that it does not include a spell checka.
+guiMatchCount=Potential errors\:
+guiMenuAbout=&About...
+guiMenuAddRules=Load &Rule File
+guiMenuCheckClipboard=&Check Text in Clipboard
+guiMenuFile=&File
+guiMenuHelp=&Help
+guiMenuHide=&Hide to System Tray
+guiMenuOpen=&Open...
+guiMenuOptions=Option&s...
+guiMenuQuit=&Quit
+guiMenuShowMainWindow=Open Main Window
+guiMotherTongue=Your mother tongue\:
+guiNoErrorsFound=No errors or warnings found (language\: {0})
+guiNoErrorsFoundSelectedText=No errors or warnings found in selected text (language\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&Change
+guiOOoCloseButton=Close
+guiOOoIgnoreAllButton=Ignore All
+guiOOoIgnoreButton=Ignore
+guiOOoOptionsButton=Options...
+guiProgressWindowTitle=LanguageTool\: Checking Text...
+guiReplaceWindowTitle=Replace text
+guiReplaceWithOtherText=<other text>
+guiRunOnPort=Run as server on po&rt
+guiSelectionCheckComplete=LanguageTool check of selected text is complete.
+incorrect_case=This sentence does not start with an uppercase letter
+is=Icelandic
+it=Italian
+lt=Lithuanian
+missing_space_after_comma=Put a space after the comma
+ml=Malayalam
+nl=Dutch
+no_space_after=Don't put a space after the opening parenthesis
+no_space_before=Don't put a space before the closing parenthesis
+no_space_before_dot=Don't put a space before the full stop
+pl=Polish
+repetition=Possible typo\: you repeated a word
+result1=<br><b> {0}. Line {1}, column {2}</b><br>
+resultAreaText=Results will appear here
+resultTime=<br>Time\: {0}ms (including {1}ms for rule matching)<br>
+ru=Russian
+sk=Slovak
+sl=Slovenian
+space_after_comma=Put a space after the comma, but not before the comma
+startChecking=Starting check in {0}
+sv=Swedish
+textLanguage=Text Language\:
+two_commas=Two consecutive commas
+two_dots=Two consecutive dots
+uk=Ukrainian
+unpaired_brackets=Unpaired bracket or similar symbol
+whitespace_repetition=Possible typo\: you repeated a whitespace
+ro=Romanian
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_cs.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_cs.properties
new file mode 100644
index 0000000..6484c20
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_cs.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=B\u011blorusky
+ca=Katal\u00e1nsky
+category_case=Kapitalizace
+category_false_friend=Fale\u0161n\u00ed p\u0159\u00e1tel\u00e9
+category_grammar=Gramatika
+category_misc=R\u016fzn\u00e9
+category_typo=Mo\u017en\u00fd p\u0159eklep
+checkDone=Kontrola ukon\u010dena, {0} nalezen\u00fdch potenci\u00e1ln\u00edch chyb.
+checkText=Prov\u00e9st kontrolu
+correctionMessage=Oprava\:
+cs=\u010cesky
+da=D\u00e1nsky
+de=N\u011bmecky
+desc_comma_whitespace=Pou\u017eit\u00ed mezery p\u0159ed \u010d\u00e1rkou a p\u0159ed/za z\u00e1vorkami
+desc_double_punct=Pou\u017eit\u00ed dvou za sebou jdouc\u00edch \u010d\u00e1rek nebo te\u010dek
+desc_repetition=Opakov\u00e1n\u00ed slov (nap\u0159. 'bude bude')
+desc_repetition_short=Opakov\u00e1n\u00ed slov
+desc_unpaired_brackets=Nesp\u00e1rovan\u00e9 z\u00e1vorky, uvozovky nebo podobn\u00e9 symboly
+desc_uppercase_sentence=Zkontroluje zda v\u011bta za\u010d\u00edn\u00e1 velk\u00fdm po\u010d\u00e1te\u010dn\u00edm p\u00edsmenem
+desc_whitespacerepetition=Opakov\u00e1n\u00ed mezery(\u0161patn\u00e9 form\u00e1tov\u00e1n\u00ed)
+double_dots_short=Dv\u011b za sebou jdouc\u00ed te\u010dky
+double_commas_short=Dv\u011b za sebou jdouc\u00ed \u010d\u00e1rky
+en=Anglicky
+enterText=Pros\u00edm zadejte nebo vlo\u017ete text, kter\u00fd chcete zkontrolovat, do vrchn\u00ed oblasti
+enterText2=Zde zadejte po\u017eadovan\u00fd text
+errorContext=Kontext\:
+errorMessage=Zpr\u00e1va\:
+es=\u0160pan\u011blsky
+false_friend=Fale\u0161n\u00fd p\u0159\u00edtel
+false_friend_desc=\u0161patn\u00fd p\u0159itel, n\u00e1pov\u011bda\:
+false_friend_hint=N\u00e1pov\u011bda\: "{0}" ({1}) znamen\u00e1 {2} ({3}).
+false_friend_suggestion=Mysleli ste {0}?
+fr=Francouzsky
+gl=Galicij\u0161tinsky
+guiCancelButton=Storno
+guiCheckComplete=LanguageTool kontrola byla ukon\u010dena
+guiConfigWindowTitle=LanguageTool nastaven\u00ed
+guiDemoText=Tohle je uk\u00e1zkov\u00fd vstup, aby p\u0159edvedl jak LanguageTool funguje. Pamatujte, pros\u00edm, \u017ee neobsahuje kontrolu pravopisu (spellchecker).
+guiMatchCount=Potenci\u00e1ln\u00ed chyby\:
+guiMenuAbout=O Aplikaci...
+guiMenuAddRules=Load Rule File
+guiMenuCheckClipboard=Zkontrolovat text ve zchr\u00e1nce
+guiMenuFile=Soubor
+guiMenuHelp=N\u00e1pov\u011bda
+guiMenuHide=Schovat do syst\u00e9move li\u0161ty
+guiMenuOpen=Otev\u0159\u00edt...
+guiMenuOptions=Nastaven\u00ed...
+guiMenuQuit=Konec
+guiMenuShowMainWindow=Open main window
+guiMotherTongue=V\u00e1\u0161 mate\u0159sk\u00fd jazyk\:
+guiNoErrorsFound=\u017d\u00e1dn\u00e9 chyby ani varov\u00e1n\u00ed nebyly nalezeny (jazyk\: {0})
+guiNoErrorsFoundSelectedText=\u017d\u00e1dn\u00e9 chyby ani varov\u00e1n\u00ed nebyly vo vyzna\u010den\u00e9m textu nalezeny (jazyk\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=Zam\u011bnit
+guiOOoCloseButton=Zav\u0159\u00edt
+guiOOoIgnoreAllButton=Ignorovat v\u0161e
+guiOOoIgnoreButton=Ignorovat
+guiOOoOptionsButton=Nastaven\u00ed...
+guiProgressWindowTitle=LanguageTool\: Kontrola textu...
+guiReplaceWindowTitle=Zam\u011bnit text
+guiReplaceWithOtherText=<jin\u00fd text>
+guiRunOnPort=Pob\u011b\u017e\u00ed jako server na portu
+guiSelectionCheckComplete=LanguageTool kontrola ozna\u010den\u00e9ho textu je ukon\u010dena.
+incorrect_case=Tato v\u011bta neza\u010d\u00edn\u00e1 z velk\u00fdm p\u00edsmenem
+is=Islandsky
+it=Italsky
+lt=Litevsky
+missing_space_after_comma=Vlo\u017eit mezeru za \u010d\u00e1rku
+ml=Malaj\u00e1lamsky
+nl=Nizozemsky
+no_space_after=Nevkl\u00e1dat mezeru za otev\u00edrac\u00ed z\u00e1vorku
+no_space_before=Nevkl\u00e1dat mezeru za uzav\u00edrac\u00ed z\u00e1vorku
+no_space_before_dot=Nevkl\u00e1dejte mezeru p\u0159ed te\u010dku
+pl=Polsky
+repetition=Mo\u017en\u00fd p\u0159eklep\: zopakovali jste slovo
+result1=<br><b> {0}. \u0158\u00e1dek {1}, sloupec {2}</b><br>
+resultAreaText=V\u00fdsledek se zobraz\u00ed zde
+resultTime=<br>\u010cas\: {0}ms (v\u010detn\u011b {1}ms na pou\u017eit\u00ed pravidel)<br>
+ru=Rusky
+sk=Slovensky
+sl=Slovinsky
+space_after_comma=Vlo\u017eit mezeru za \u010d\u00e1rku, ale ne p\u0159ed \u010d\u00e1rku
+startChecking=Za\u010d\u00e1tek kontroly v {0}
+sv=\u0160v\u00e9dsky
+textLanguage=Jazyk textu\:
+two_commas=Dv\u011b po sebe jdouc\u00ed \u010d\u00e1rky
+two_dots=Dv\u011b po sebe jdouc\u00ed te\u010dky
+uk=Ukrajinsky
+unpaired_brackets=Nesp\u00e1rovan\u00e9 z\u00e1vorky nebo podobn\u00fd symbol
+whitespace_repetition=Mo\u017en\u00fd p\u0159eklep\: zopakovali jste mezeru
+ro=Rumunsky
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_da.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_da.properties
new file mode 100644
index 0000000..419d33d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_da.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Hviderussisk
+ca=Catalansk
+category_case=STORE/sm\u00e5 bogstaver
+category_false_friend=Falske venner
+category_grammar=Grammatik
+category_misc=Diverse
+category_typo=Mulig sl\u00e5fejl
+checkDone=Kontrol gennemf\u00f8rt, {0} mulige problemer fundet
+checkText=&Kontrollere tekst
+correctionMessage=Korrektion\:
+cs=Tjekkisk
+da=Dansk
+de=Tysk
+desc_comma_whitespace=Mellemrum f\u00f8r komma og f\u00f8r/efter parenteser
+desc_double_punct=To p\u00e5 hinanden f\u00f8lgende punktummer eller kommaer
+desc_repetition=Ordgentagelse (f.eks. 'den den')
+desc_repetition_short=Ordgentagelse
+desc_unpaired_brackets=Ikke parret parenteser, tuborgklammer, citationstegn og lignende symboler
+desc_uppercase_sentence=Kontroller at s\u00e6tningen starter med et stort begyndelsesbogstav
+desc_whitespacerepetition=Gentagende mellemrum (d\u00e5rlig formatering)
+double_dots_short=To p\u00e5 hinanden f\u00f8lgende punktummer
+double_commas_short=To p\u00e5 hinanden f\u00f8lgende kommaer
+en=Engelsk
+enterText=Indtast eller inds\u00e6t teksten der skal kontrolleres i det \u00f8verste felt
+enterText2=Inds\u00e6t teksten der skal kontrollers her
+errorContext=Sammenh\u00e6ng\:
+errorMessage=Meddelelse\:
+es=Spansk
+false_friend=Falske venner
+false_friend_desc=Tip om falske venner\:
+false_friend_hint=Tip\: "{0}" ({1}) betyder {2} ({3}).
+false_friend_suggestion=Mente du {0}?
+fr=Fransk
+gl=Galicisk
+guiCancelButton=Annuller
+guiCheckComplete=LanguageTools kontrol er f\u00e6rdig.
+guiConfigWindowTitle=LanguageTool Indstillinger
+guiDemoText=Dette er et teksteksempel for at at vise hvordan LanguageTool virker. Bem\u00e6rk dog, at den ikke indeholder en stavekontrol.
+guiMatchCount=Mulig fejl\:
+guiMenuAbout=&Om LanguageTool
+guiMenuAddRules=Hent &regelfil
+guiMenuCheckClipboard=&Kontroller tekst i klipholderen
+guiMenuFile=&Filer
+guiMenuHelp=&Hj\u00e6lp
+guiMenuHide=&Skjul til systembakken
+guiMenuOpen=&\u00c5ben...
+guiMenuOptions=&Indstillinger...
+guiMenuQuit=&Afslut
+guiMenuShowMainWindow=\u00c5ben hovedvinduet
+guiMotherTongue=Dit modersm\u00e5l\:
+guiNoErrorsFound=Ingen fejl eller advarsler fundet (sprog\: {0})
+guiNoErrorsFoundSelectedText=Ingen fejl eller advarsler fundet i den markerede tekst (sprog\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&Change
+guiOOoCloseButton=&Luk
+guiOOoIgnoreAllButton=I&gnorer alle
+guiOOoIgnoreButton=&Ignorer her
+guiOOoOptionsButton=In&dstillinger...
+guiProgressWindowTitle=LanguageTool\: Kontrollere teks...
+guiReplaceWindowTitle=Erstat tekst
+guiReplaceWithOtherText=<anden tekst>
+guiRunOnPort=K\u00f8r som service p\u00e5 po&rt
+guiSelectionCheckComplete=LanguageTools kontrol af markerede tekst er f\u00e6rdig.
+incorrect_case=Denne s\u00e6tning starter ikke med et stort begyndelsesbogstav
+is=Islandsk
+it=Italiensk
+lt=Lettisk
+missing_space_after_comma=Inds\u00e6t et mellemrum efter kommaet
+ml=Malayalam
+nl=Hollandsk
+no_space_after=Inds\u00e6t ikke et mellemrum efter parentesbegynd
+no_space_before=Inds\u00e6t ikke et mellemrum f\u00f8r parentesslut
+no_space_before_dot=Inds\u00e6t ikke et mellemrum f\u00f8r punktum
+pl=Polsk
+repetition=Mulig sl\u00e5fejl\: du har gentaget et ord
+result1=<br><b> {0}. Linje {1}, kolonne {2}</b><br>
+resultAreaText=Resultater vil vise sig her
+resultTime=<br>Tid\: {0}ms (inklusiv {1}ms til regelafpr\u00f8vning)<br>
+ru=Russisk
+sk=Slovakisk
+sl=Slovensk
+space_after_comma=Inds\u00e6t et mellemrum efter kommaet, ikke f\u00f8r det.
+startChecking=Starter kontrollen om {0}
+sv=Svensk
+textLanguage=Tekstens sprog\:
+two_commas=To p\u00e5 hinanden f\u00f8lgende kommaer
+two_dots=To p\u00e5 hinanden f\u00f8lgende punktummer
+uk=Ukrainsk
+unpaired_brackets=Ikke parret parenteser eller lignende symboler
+whitespace_repetition=Mulig sl\u00e5fejl\: du har gentaget et mellemrum
+ro=Rum\u00e6nsk
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_de.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_de.properties
new file mode 100644
index 0000000..ccffd28
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_de.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+be=Wei\u00dfrussisch
+ca=Katalanisch
+category_case=Gro\u00df-/Kleinschreibung
+category_false_friend=Falsche Freunde
+category_grammar=Grammatik
+category_misc=Sonstiges
+category_typo=M\u00f6gliche Tippfehler
+checkDone=Pr\u00fcfung beendet, {0} m\u00f6gliche Probleme gefunden
+checkText=Text &pr\u00fcfen
+correctionMessage=Korrektur\:
+cs=Tschechisch
+da=D\u00e4nisch
+de=Deutsch
+desc_comma_whitespace=Leerzeichen vor/hinter Kommas und Klammern
+desc_double_punct=Zwei aufeinander folgende Kommas oder Punkte
+desc_repetition=Wortwiederholung (z.B. 'als als')
+desc_repetition_short=Wortwiederholung
+desc_unpaired_brackets=Unpaarige Anf\u00fchrungszeichen und Klammern
+desc_uppercase_sentence=Gro\u00dfschreibung am Satzanfang
+desc_whitespacerepetition=Wiederholung von Leerzeichen
+double_dots_short=Zwei aufeinander folgende Punkte
+double_commas_short=Zwei aufeinander folgende Kommas
+en=Englisch
+eo=Esperanto
+enterText=Bitte Text ins obere Feld eintippen oder hineinkopieren
+enterText2=Bitte Text hier eintippen oder hineinkopieren
+errorContext=Text\:
+errorMessage=Hinweis\:
+es=Spanisch
+false_friend=Falscher Freund
+false_friend_desc=Falscher-Freund-Hinweis f\u00fcr\:
+false_friend_hint=Hinweis\: "{0}" ({1}) bedeutet {2} ({3}).
+false_friend_suggestion=Meinten Sie vielleicht {0}?
+fr=Franz\u00f6sisch
+gl=Galicisch
+guiCancelButton=Abbrechen
+guiCheckComplete=LanguageTool-Pr\u00fcfung beendet.
+guiConfigWindowTitle=LanguageTool Optionen
+guiDemoText=Dies ist ein Beispiel-Text, um zu zeigen zeigen, wie LanguageTool funktioniert. Wie man sieht, ist keine R\u00e4chtshreibpr\u00fcfung enthalten.
+guiMatchCount=M\u00f6gliche Fehler\:
+guiMenuAbout=\u00dcber...
+guiMenuAddRules=Regeldatei laden...
+guiMenuCheckClipboard=Text in der Zwischenablage pr\u00fcfen
+guiMenuFile=Datei
+guiMenuHelp=Hilfe
+guiMenuHide=In den System Tray verkleinern
+guiMenuOpen=\u00d6ffnen...
+guiMenuOptions=Optionen...
+guiMenuQuit=Beenden
+guiMenuShowMainWindow=Hauptfenster \u00f6ffnen
+guiMotherTongue=Ihre Muttersprache\:
+guiNoErrorsFound=Keine Fehler und Warnungen gefunden (Textsprache\: {0})
+guiNoErrorsFoundSelectedText=Keine Fehler und Warnungen im selektierten Text gefunden (Textsprache\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=\u00c4ndern
+guiOOoCloseButton=Schlie\u00dfen
+guiOOoIgnoreAllButton=Alle ignorieren
+guiOOoIgnoreButton=Ignorieren
+guiOOoOptionsButton=Optionen...
+guiProgressWindowTitle=LanguageTool\: Text pr\u00fcfen...
+guiReplaceWindowTitle=Text ersetzen
+guiReplaceWithOtherText=<anderer Text>
+guiRunOnPort=Als Server laufen auf Po&rt
+guiSelectionCheckComplete=LanguageTool-Pr\u00fcfung des selektierten Textes ist beendet.
+incorrect_case=Dieser Satz f\u00e4ngt nicht mit einem gro\u00df geschriebenen Wort an
+is=Isl\u00e4ndisch
+it=Italienisch
+lt=Litauisch
+missing_space_after_comma=Hinter einem Komma sollte ein Leerzeichen stehen.
+ml=Malayalam
+nl=Niederl\u00e4ndisch
+no_space_after=Hinter einer \u00f6ffnenden Klammer wird kein Leerzeichen eingef\u00fcgt.
+no_space_before=Vor einer schlie\u00dfeden Klammer wird kein Leerzeichen eingef\u00fcgt.
+no_space_before_dot=Vor dem Punkt sollte kein Leerzeichen stehen
+pl=Polnisch
+repetition=M\u00f6glicher Tippfehler\: ein Wort wird wiederholt
+result1=<br><b> {0}. Zeile {1}, Spalte {2}</b><br>
+resultAreaText=Hier erscheint das Ergebnis der Textpr\u00fcfung
+resultTime=<br>Zeit\: {0}ms (davon {1}ms f\u00fcr Regelpr\u00fcfungen)<br>
+ru=Russisch
+sk=Slowakisch
+sl=Slowenisch
+space_after_comma=Nur hinter einem Komma steht ein Leerzeichen, aber nicht davor.
+startChecking=Beginne Pr\u00fcfung in {0}
+sv=Schwedisch
+textLanguage=Textsprache\:
+two_commas=Zwei aufeinander folgende Kommas.
+two_dots=Zwei aufeinander folgende Punkte.
+uk=Ukrainisch
+unpaired_brackets=Es fehlt eine Klammer usw.
+whitespace_repetition=M\u00f6glicher Tippfehler\: mehr als ein Leerzeichen hintereinander
+ro=Rum\u00e4nisch
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_en.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_en.properties
new file mode 100644
index 0000000..5133e9a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_en.properties
@@ -0,0 +1,189 @@
+#Generated by ResourceBundle Editor (http://eclipse-rbe.sourceforge.net)
+# English translation of LanguageTool
+# Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de), Marcin Milkowski
+
+be = Belarusian
+
+ca = Catalan
+
+category_case = Capitalization
+
+category_false_friend = False friends
+
+category_grammar = Grammar
+
+category_misc = Miscellaneous
+
+category_typo = Possible Typo
+
+checkDone = Check done, {0} potential problems found
+
+checkText = &Check Text
+
+correctionMessage = Correction:
+
+cs = Czech
+
+da = Danish
+
+de = German
+
+nb = Norwegian (Bokmål)
+
+desc_comma_whitespace = Use of whitespace before comma and before/after parentheses
+
+desc_double_punct = Use of two consecutive dots or commas
+
+desc_repetition = Word repetition (e.g. 'will will')
+
+desc_repetition_short = Word repetition
+
+desc_unpaired_brackets = Unpaired braces, brackets, quotation marks and similar symbols
+
+desc_uppercase_sentence = Checks that a sentence starts with an uppercase letter
+
+desc_whitespacerepetition = Whitespace repetition (bad formatting)
+
+double_dots_short = Two consecutive dots
+
+double_commas_short = Two consecutive comma
+
+en = English
+
+enterText = Please type or paste text to check in the top area
+
+enterText2 = Please insert text to check here
+
+eo = Esperanto
+
+errorContext = Context:
+
+errorMessage = Message:
+
+es = Spanish
+
+false_friend = False friend
+
+false_friend_desc = false friend hint for:
+
+false_friend_hint = Hint: "{0}" ({1}) means {2} ({3}).
+
+false_friend_suggestion = Did you mean {0}?
+
+fr = French
+
+gl = Galician
+
+guiCancelButton = Cancel
+
+guiCheckComplete = LanguageTool check is complete.
+
+guiConfigWindowTitle = LanguageTool Options
+
+guiDemoText = This is a example input to to show you how LanguageTool works. Note, however, that it does not include a spell checka.
+
+guiMatchCount = Potential errors:
+
+guiMenuAbout = &About...
+
+guiMenuAddRules = Load &Rule File
+
+guiMenuCheckClipboard = &Check Text in Clipboard
+
+guiMenuFile = &File
+
+guiMenuHelp = &Help
+
+guiMenuHide = &Hide to System Tray
+
+guiMenuOpen = &Open...
+
+guiMenuOptions = Option&s...
+
+guiMenuQuit = &Quit
+
+guiMenuShowMainWindow = Open Main Window
+
+guiMotherTongue = Your mother tongue:
+
+guiNoErrorsFound = No errors or warnings found (language: {0})
+
+guiNoErrorsFoundSelectedText = No errors or warnings found in selected text (language: {0})
+
+guiOKButton = &OK
+
+guiOOoChangeButton = &Change
+
+guiOOoCloseButton = Close
+
+guiOOoIgnoreAllButton = Ignore All
+
+guiOOoIgnoreButton = Ignore
+
+guiOOoOptionsButton = Options...
+
+guiProgressWindowTitle = LanguageTool: Checking Text...
+
+guiReplaceWindowTitle = Replace text
+
+guiReplaceWithOtherText = <other text>
+
+guiRunOnPort = Run as server on po&rt
+
+guiSelectionCheckComplete = LanguageTool check of selected text is complete.
+
+incorrect_case = This sentence does not start with an uppercase letter
+
+is = Icelandic
+
+it = Italian
+
+lt = Lithuanian
+
+missing_space_after_comma = Put a space after the comma
+
+ml = Malayalam
+
+nl = Dutch
+
+no_space_after = Don't put a space after the opening parenthesis
+
+no_space_before = Don't put a space before the closing parenthesis
+
+no_space_before_dot = Don't put a space before the full stop
+
+pl = Polish
+
+repetition = Possible typo: you repeated a word
+
+result1 = <br><b> {0}. Line {1}, column {2}</b><br>
+
+resultAreaText = Results will appear here
+
+resultTime = <br>Time: {0}ms (including {1}ms for rule matching)<br>
+
+ru = Russian
+
+sk = Slovak
+
+sl = Slovenian
+
+space_after_comma = Put a space after the comma, but not before the comma
+
+startChecking = Starting check in {0}
+
+sv = Swedish
+
+textLanguage = Text Language:
+
+two_commas = Two consecutive commas
+
+two_dots = Two consecutive dots
+
+uk = Ukrainian
+
+unpaired_brackets = Unpaired bracket or similar symbol
+
+whitespace_repetition = Possible typo: you repeated a whitespace
+
+ro = Romanian
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_eo.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_eo.properties
new file mode 100644
index 0000000..f962fa7
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_eo.properties
@@ -0,0 +1,198 @@
+# Esperanto translation of LanguageTool
+# Copyright (C) 2010 Daniel Naber (http://www.danielnaber.de), Marcin Milkowski
+#
+# Vim Command to run before editing (to see accentuated letters rather
+# their code):
+# :v/^#/s:\\u0109:ĉ:gI |s:\\u0108:Ĉ:gI |s:\\u011d:ĝ:gI |s:\\u0135:ĵ:gI |s:\\u0125:ĥ:gI |s:\\u016d:ŭ:gI |s:\\u015c:Ŝ:gI
+#
+# Vim command to run before saving file:
+# :v/^#/s:ĉ:\\u0109:gI |s:Ĉ:\\u0108:gI |s:ĝ:\\u011d:gI |s:ĵ:\\u0135:gI |s:ĥ:\\u0125:gI |s:ŭ:\\u016d:gI |s:Ŝ:\\u015c:gI
+#
+# Sendu komentojn aŭ korektojn al:
+# Send comments or corrections to: Dominique Pellé <dominique.pelle@gmail.com>
+#
+#
+be = Belorusa
+
+ca = Kataluna
+
+category_case = Uskleco
+
+category_false_friend = Falsaj amikoj
+
+category_grammar = Gramatiko
+
+category_misc = Diversa\u0135oj
+
+category_typo = Ebla misliterumo
+
+checkDone = Kontrolo farita, {0} eblaj problemoj trovitaj
+
+checkText = &Kontroli tekston
+
+correctionMessage = Korektado:
+
+cs = \u0108e\u0125a
+
+da = Dana
+
+de = Germana
+
+desc_comma_whitespace = Uzo de spaceton anta\u016d komo kaj anta\u016d/malanta\u016d krampoj
+
+desc_double_punct = Uzo de sinsekvaj punktoj a\u016d komoj
+
+desc_repetition = Ripetita vorto (ekz. 'li li')
+
+desc_repetition_short = Ripetita vorto
+
+desc_unpaired_brackets = Nekongruaj krampoj, rektaj krampoj, citiloj kaj similaj signoj
+
+desc_uppercase_sentence = Kontrolas, \u0109u frazo komenci\u011das per majuskla litero
+
+desc_whitespacerepetition = Ripetita spaceto (ne\u011dusta formato)
+
+double_dots_short = Du sinsekvaj punktoj
+
+double_commas_short = Du sinsekvaj komoj
+
+en = Angla
+
+enterText = Bonvolu tajpi a\u016d alglui kontrolendan tekston en la supra kampo
+
+enterText2 = Bonvolu enmeti la kontrolendajn tekstojn \u0109i tie
+
+eo = Esperanto
+
+errorContext = Kunteksto:
+
+errorMessage = Mesa\u011do:
+
+es = Hispana
+
+false_friend = Falsaj amikoj
+
+false_friend_desc = false friend hint for:
+
+false_friend_hint = Konsilo: "{0}" ({1}) signifas {2} ({3}).
+
+false_friend_suggestion = \u0108u vi intencis {0}?
+
+fr = Franca
+
+gl = Galega
+
+guiCancelButton = Rezigni
+
+guiCheckComplete = Kontrolado de LingvoIlo fini\u011dis.
+
+guiConfigWindowTitle = Opcioj de LingvoIlo
+
+# Errors are on purpose in this demo text.
+guiDemoText = \u0108i tiu ekzemplo estas ekzemplo por por montri kiel funkcias LingvoIlo. Rimarku, tamen, ke \u011di ne inkluzivas litterumulon.
+
+guiMatchCount = Eblaj eraroj:
+
+guiMenuAbout = &Pri...
+
+guiMenuAddRules = \u015cargi dosieron de &reguloj
+
+guiMenuCheckClipboard = &Kontroli tekston en la tondujo
+
+guiMenuFile = &Dosiero
+
+guiMenuHelp = &Helpo
+
+guiMenuHide = K&aŝi en la taskopleto
+
+guiMenuOpen = &Malfermi...
+
+guiMenuOptions = &Opcioj...
+
+guiMenuQuit = &Eliti
+
+guiMenuShowMainWindow = Malfermi la \u0109efan fenestron
+
+guiMotherTongue = Via denaska lingvo:
+
+guiNoErrorsFound = Neniuj eraroj a\u016d avertoj trovitaj (lingvo: {0})
+
+guiNoErrorsFoundSelectedText = Neniuj eraroj a\u016d avertoj trovitaj en la apartigita teksto (lingvo: {0})
+
+guiOKButton = &Bone
+
+guiOOoChangeButton = \u015ca&n\u011di
+
+guiOOoCloseButton = Fermi
+
+guiOOoIgnoreAllButton = Ignori \u0109iujn
+
+guiOOoIgnoreButton = Ignori
+
+guiOOoOptionsButton = Opcioj...
+
+guiProgressWindowTitle = LinvoIlo: kontrolado de teksto...
+
+guiReplaceWindowTitle = Anstata\u016digi tekston
+
+guiReplaceWithOtherText = <alia teksto>
+
+guiRunOnPort = Run as server on po&rt
+
+guiSelectionCheckComplete = Kontrolo per LinvoIlo de apartigita teksto fini\u011dis.
+
+incorrect_case = Tiu frazo ne komenci\u011das per majuskla litero
+
+is = Islanda
+
+it = Itala
+
+lt = Litova
+
+missing_space_after_comma = Enmetu spaceton post la komo
+
+ml= Malajala
+
+nl = Nederlanda
+
+no_space_after = Ne metu spaceton malanta\u016d malfermantaj krampoj
+
+no_space_before = Ne metu spaceton anta\u016d fermanta krampo
+
+no_space_before_dot = Ne enmetu spaceton anta\u016d punkto
+
+pl = Pola
+
+repetition = Ebla mistajpa\u0135o: vi ripetis vorton
+
+result1 = <br><b> {0}. Linio {1}, kolumno {2}</b><br>
+
+resultAreaText = Rezultoj aperos tie
+
+resultTime = <br>Tempo: {0}ms (inkluzive {1}ms por rekono de reguloj)<br>
+
+ru = Rusa
+
+sk = Slovaka
+
+sl = Slovena
+
+space_after_comma = Enmeti spaceton post la komo, sed ne anta\u016d la komo
+
+startChecking = Ekkontroli en {0}
+
+sv = Sveda
+
+textLanguage = Lingvo de teksto:
+
+two_commas = Du sinsekvaj komoj
+
+two_dots = Du sinsekvaj punktoj
+
+uk = Ukraina
+
+unpaired_brackets = Nekongruaj krampoj a\u016d similaj simbolo
+
+whitespace_repetition = Ebla mistajpa\u0135o: vi ripetis spaceton
+
+ro = Rumana
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_es.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_es.properties
new file mode 100644
index 0000000..e95e2b4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_es.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bielorruso
+ca=Catal\u00e1n
+category_case=May\u00fasculas y min\u00fasculas
+category_false_friend=Falsos amigos
+category_grammar=Gram\u00e1tica
+category_misc=Diversas
+category_typo=Posible error tipogr\u00e1fico
+checkDone=Revisi\u00f3n completa, {0} posibles problemas encontrados
+checkText=&Revisar Texto
+correctionMessage=Correcci\u00f3n\:
+cs=checo
+da=dan\u00e9s
+de=alem\u00e1n
+desc_comma_whitespace=Espacios en blanco antes de coma y antes/despu\u00e9s de parent\u00e9sis
+desc_double_punct=Dos puntos o comas consecutivos
+desc_repetition=Repetici\u00f3n de una palabra (p. ej. 'soy soy')
+desc_repetition_short=Repetici\u00f3n de una palabra
+desc_unpaired_brackets=Par\u00e9ntesis, comillas, signos de exclamaci\u00f3n, interrogaci\u00f3n y similares desparejados
+desc_uppercase_sentence=Comprobar si la frase se inicia con una letra may\u00fascula
+desc_whitespacerepetition=M\u00faltiples espacios en blanco
+double_dots_short=Dos puntos consecutivos
+double_commas_short=Dos comas consecutivas
+en=ingl\u00e9s
+enterText=Escriba o pegue el texto a revisar en el cuadro superior
+enterText2=Escriba o pegue el texto a revisar aqu\u00ed
+errorContext=Contexto\:
+errorMessage=Mensaje\:
+es=espa\u00f1ol
+false_friend=Falso amigo
+false_friend_desc=Nota para falso amigo\:
+false_friend_hint=Nota\: "{0}" ({1}) significa {2} ({3}).
+false_friend_suggestion=\u00bfQuiere decir {0}?
+fr=franc\u00e9s
+gl=gallego
+guiCancelButton=Cancelar
+guiCheckComplete=Se ha completado la revisi\u00f3n de LanguageTool.
+guiConfigWindowTitle=LanguageTool - Opciones
+guiDemoText=Ese es un un ejemplo del texto que mostra como funcciona LanguageTool. Ese programma no incluieee ninguna revisi\u00f3n ortogr\u00e1fica\!
+guiMatchCount=Posibles errores\:
+guiMenuAbout=Acerca de...
+guiMenuAddRules=Cargar fichero de reglas
+guiMenuCheckClipboard=Revisar el texto del portapapeles
+guiMenuFile=Archivo
+guiMenuHelp=Ayuda
+guiMenuHide=Minimizar a la bandeja del sistema
+guiMenuOpen=Abrir...
+guiMenuOptions=Opciones...
+guiMenuQuit=Terminar
+guiMenuShowMainWindow=Abrir ventana principal
+guiMotherTongue=Idioma nativo\:
+guiNoErrorsFound=No se han encontrado errores ni advertencias (idioma\: {0})
+guiNoErrorsFoundSelectedText=No se han encontrado errores ni advertencias en el texto seleccionado (idioma\: {0})
+guiOKButton=Aceptar
+guiOOoChangeButton=Cambiar
+guiOOoCloseButton=Cerrar
+guiOOoIgnoreAllButton=Ignorar siempre
+guiOOoIgnoreButton=Ignorar
+guiOOoOptionsButton=Opciones...
+guiProgressWindowTitle=LanguageTool\: Comprobaci\u00f3n del texto...
+guiReplaceWindowTitle=Sustituir texto
+guiReplaceWithOtherText=<otro texto>
+guiRunOnPort=Ejecutar como servidor en puerto
+guiSelectionCheckComplete=Se ha completado la verificaci\u00f3n del texto seleccionado en LanguageTool.
+incorrect_case=Esa frase no se inicia con may\u00fascula
+is=island\u00e9s
+it=italiano
+lt=lituano
+missing_space_after_comma=Deja un espacio despu\u00e9s de coma
+ml=Malayo
+nl=holand\u00e9s
+no_space_after=No se deja un espacio despu\u00e9s de un par\u00e9ntesis izquierdo
+no_space_before=No se deja un espacio antes de un par\u00e9ntesis derecho
+no_space_before_dot=No se deja un espacio antes del punto
+pl=polaco
+repetition=Posible error tipogr\u00e1fico\: repetici\u00f3n de una palabra
+result1=<br><b> {0}. L\u00ednea {1}, Columna {2}</b><br>
+resultAreaText=Los resultados aparecer\u00e1n aqu\u00ed.
+resultTime=<br>Tiempo\: {0}ms (incluye {1}ms para la coincidencia de reglas)<br>
+ru=ruso
+sk=eslovaco
+sl=esloveno
+space_after_comma=Se deja un espacio despu\u00e9s de coma y nunca antes del signo ortogr\u00e1fico.
+startChecking=Inicio de verificaci\u00f3n en {0}
+sv=sueco
+textLanguage=Idioma del texto\:
+two_commas=Dos comas consecutivas
+two_dots=Dos puntos consecutivos
+uk=ucraniano
+unpaired_brackets=Se ha encontrado un error en los par\u00e9ntesis, comillas, signos de exclamaci\u00f3n o interrogaci\u00f3n
+whitespace_repetition=Posible error tipogr\u00e1fico\: m\u00faltiples espacios en blanco
+ro=rumano
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_fr.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_fr.properties
new file mode 100644
index 0000000..1befa1f
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_fr.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bi\u00e9lorusse
+ca=catalan
+category_case=Majuscules
+category_false_friend=Faux amis
+category_grammar=Grammaire
+category_misc=R\u00e8gles de base
+category_typo=Faute de frappe possible
+checkDone=V\u00e9rification termin\u00e9e. Nombre d'alertes \: {0}
+checkText=V\u00e9rifier le texte
+correctionMessage=Correction \:
+cs=tch\u00e8que
+da=danois
+de=allemand
+desc_comma_whitespace=Espace devant \u00ab\u00a0,\u00a0\u00bb, \u00ab\u00a0)\u00a0\u00bb ou apr\u00e8s \u00ab\u00a0(\u00a0\u00bb
+desc_double_punct=Virgules ou points cons\u00e9cutifs
+desc_repetition=Doublon (\u00ab\u00a0pour pour\u00a0\u00bb, \u00ab\u00a0je je\u00a0\u00bb, etc.)
+desc_repetition_short=Doublon
+desc_unpaired_brackets=Guillemet fermant ou ouvrant manquant
+desc_uppercase_sentence=Majuscule en d\u00e9but de phrase
+desc_whitespacerepetition=Plusieurs espaces blanches (mauvais format)
+double_dots_short=Points cons\u00e9cutifs
+double_commas_short=Virgules cons\u00e9cutifs
+en=anglais
+enterText=Ins\u00e9rez le texte \u00e0 v\u00e9rifier dans l'espace du haut
+enterText2=Placez le texte \u00e0 v\u00e9rifier ici
+errorContext=Contexte \:
+errorMessage=Message \:
+es=espagnol
+false_friend=Faux ami
+false_friend_desc=note concernant les faux-amis \:
+false_friend_hint=Note \: "{0}" ({1}) signifie {2} ({3}).
+false_friend_suggestion=Voulez-vous dire {0} ?
+fr=fran\u00e7ais
+gl=galicien
+guiCancelButton=Annuler
+guiCheckComplete=La v\u00e9rification de LanguageTool est termin\u00e9e.
+guiConfigWindowTitle=Options de LanguageTool
+guiDemoText=Se texte est un exemple pour pour vous montrer le fonctionnement de LanguageTool. notez que LanguageTool ne comporte pas de correcteur orthographique.
+guiMatchCount=Erreurs possibles \:
+guiMenuAbout=\u00c0 propos...
+guiMenuAddRules=Charger un fichier de r\u00e8gles
+guiMenuCheckClipboard=V\u00e9rifier le texte dans le presse-papiers
+guiMenuFile=Fichier
+guiMenuHelp=Aide
+guiMenuHide=R\u00e9duire dans la barre des t\u00e2ches
+guiMenuOpen=Ouvrir...
+guiMenuOptions=Options...
+guiMenuQuit=&Quitter
+guiMenuShowMainWindow=Ouvrir la fen\u00eatre principale
+guiMotherTongue=Langue maternelle \:
+guiNoErrorsFound=Ni alerte ni erreur n'ont \u00e9t\u00e9 trouv\u00e9es (language\: {0})
+guiNoErrorsFoundSelectedText=Ni alerte ni erreur n'ont \u00e9t\u00e9 trouv\u00e9es pour le texte s\u00e9lectionn\u00e9 (language\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=Changer
+guiOOoCloseButton=Fermer
+guiOOoIgnoreAllButton=Ignorer tout
+guiOOoIgnoreButton=Ignorer
+guiOOoOptionsButton=Options...
+guiProgressWindowTitle=LanguageTool \: V\u00e9rification en cours...
+guiReplaceWindowTitle=Remplacer le texte
+guiReplaceWithOtherText=<other text>
+guiRunOnPort=Ex\u00e9cuter en tant que serveur sur le port
+guiSelectionCheckComplete=La v\u00e9rification de LanguageTool pour le texte s\u00e9lectionn\u00e9 est termin\u00e9e.
+incorrect_case=Cette phrase ne commence pas par une majuscule
+is=islandais
+it=italien
+lt=lituanien
+missing_space_after_comma=Cr\u00e9ez une espace apr\u00e8s la virgule
+ml=malayalam
+nl=n\u00e9erlandais
+no_space_after=Ne placez pas d'espace apr\u00e8s une parenth\u00e8se ouvrante
+no_space_before=Ne placez pas d'espace avant une parenth\u00e8se fermante
+no_space_before_dot=Ne placez pas d'espace avant le point
+pl=polonais
+repetition=Faute de frappe possible \: un mot est r\u00e9p\u00e9t\u00e9
+result1=<br><b> {0}. Ligne {1}, colonne {2}</b><br>
+resultAreaText=Les r\u00e9sultats se trouveront ici.
+resultTime=<br>Temps \: {0}ms (dont {1}ms pour le filtrage de r\u00e8gles)<br>
+ru=russe
+sk=slovaque
+sl=slov\u00e9nien
+space_after_comma=Placer l'espace apr\u00e8s la virgule et non avant
+startChecking=Commencement de la v\u00e9rification en {0}
+sv=su\u00e9dois
+textLanguage=Langue du texte \:
+two_commas=Deux virgules cons\u00e9cutives
+two_dots=Deux points cons\u00e9cutifs
+uk=ukrainien
+unpaired_brackets=Il manque une parenth\u00e8se fermante ou ouvrante, un guillemet fermant ou ouvrant, etc.
+whitespace_repetition=Faute de frappe possible \: vous avez r\u00e9p\u00e9t\u00e9 une espace
+ro=roumain
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_gl.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_gl.properties
new file mode 100644
index 0000000..c5a7489
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_gl.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bielorruso
+ca=catal\u00e1n
+category_case=Mai\u00fasculas e min\u00fasculas
+category_false_friend=Falsos amigos
+category_grammar=Gram\u00e1tica
+category_misc=Miscel\u00e1nea
+category_typo=Pos\u00edbeis erros tipogr\u00e1ficos
+checkDone=Comprobaci\u00f3n feita, atop\u00e1ronse {0} problemas potenciais
+checkText=&Comprobar o texto
+correctionMessage=Correcci\u00f3n\:
+cs=checo
+da=dan\u00e9s
+de=alem\u00e1n
+desc_comma_whitespace=Uso de espazos en branco diante dunha coma ou antes/despois de par\u00e9ntese
+desc_double_punct=Uso de dous puntos ou comas consecutivos
+desc_repetition=Repetici\u00f3n dunha palabra (por exemplo, 'vai vai')
+desc_repetition_short=Repetici\u00f3n dunha palabra
+desc_unpaired_brackets=Par\u00e9nteses, comi\u00f1as e s\u00edmbolos similares desemparellados
+desc_uppercase_sentence=Comproba que unha oraci\u00f3n comece con mai\u00fascula
+desc_whitespacerepetition=M\u00faltiples espazos en branco (erro de formato)
+double_dots_short=Dous puntos consecutivos
+double_commas_short=D\u00c3\u00baas comas consecutivas
+en=ingl\u00e9s
+enterText=Por favor, teclee ou pegue o texto a corrixir na \u00e1rea superior
+enterText2=Por favor, insira aqu\u00ed o texto a corrixir
+errorContext=Contexto\:
+errorMessage=Mensaxe\:
+es=espa\u00f1ol
+false_friend=Falso amigo
+false_friend_desc=Nota referente aos falsos amigos\:
+false_friend_hint=Consello\: "{0}" ({1}) significa {2} ({3}).
+false_friend_suggestion=Quer\u00eda vostede dicir {0}?
+fr=franc\u00e9s
+gl=galego
+guiCancelButton=Cancelar
+guiCheckComplete=Completouse a correcci\u00f3n de LanguageTool.
+guiConfigWindowTitle=Opci\u00f3ns de LanguageTool
+guiDemoText=Esta vai a ser unha mostra de de exemplo para amosar o funcionamento de LanguageTool. Por\u00e9n, te\u00f1a en conta que non incl\u00fae un corretor de ortografia.
+guiMatchCount=Erros potenciais\:
+guiMenuAbout=&Acerca de...
+guiMenuAddRules=Cargar ficheiro de &regras
+guiMenuCheckClipboard=&Corrixir texto do portaretallos
+guiMenuFile=&Ficheiro
+guiMenuHelp=A&xuda
+guiMenuHide=&Minimizar \u00e1 bandexa do sistema
+guiMenuOpen=A&brir...
+guiMenuOptions=Opci\u00f3n&s...
+guiMenuQuit=Sa\u00ed&r
+guiMenuShowMainWindow=Abrir fiestra principal
+guiMotherTongue=A s\u00faa lingua materna\:
+guiNoErrorsFound=Non se atoparon erros nin advertencias (lingua\: {0})
+guiNoErrorsFoundSelectedText=Non se atoparon erros ou advertencias no texto seleccionado (lingua\: {0})
+guiOKButton=&Aceptar
+guiOOoChangeButton=&Cambiar
+guiOOoCloseButton=Pechar
+guiOOoIgnoreAllButton=Ignorar todos
+guiOOoIgnoreButton=Ignorar
+guiOOoOptionsButton=Opci\u00f3ns...
+guiProgressWindowTitle=LanguageTool\: Comprobaci\u00f3n do texto...
+guiReplaceWindowTitle=Substitu\u00edr o texto
+guiReplaceWithOtherText=<outro texto>
+guiRunOnPort=Executar como servidor no por&to
+guiSelectionCheckComplete=LanguageTool completou a correcci\u00f3n do texto seleccionado.
+incorrect_case=Esta oraci\u00f3n non comeza cunha letra mai\u00fascula
+is=island\u00e9s
+it=italiano
+lt=lituano
+missing_space_after_comma=Po\u00f1a un espazo detr\u00e1s da coma
+ml=malaiala
+nl=holand\u00e9s
+no_space_after=Non debe usar espazos detr\u00e1s dos par\u00e9nteses de apertura
+no_space_before=Non debe usar espazos antes dos par\u00e9nteses de peche
+no_space_before_dot=Non debe po\u00c3\u00b1er espazos antes dun punto.
+pl=polaco
+repetition=Pos\u00edbel erro tipogr\u00e1fico\: repet\u00edu unha palabra
+result1=<br><b> {0}. Li\u00f1a {1}, columna {2}</b><br>
+resultAreaText=Os resultados aparecer\u00e1n aqu\u00ed
+resultTime=<br>Tempo\: {0}ms (isto incl\u00fae {1}ms de coincidencia de regras)<br>
+ru=ruso
+sk=eslovaco
+sl=esloveno
+space_after_comma=Po\u00f1a un espazo en branco despois da coma, pero nunca antes
+startChecking=Inicio da verificaci\u00f3n en {0}
+sv=sueco
+textLanguage=Lingua do texto\:
+two_commas=D\u00faas comas consecutivas
+two_dots=Dous puntos consecutivos
+uk=ucra\u00edno
+unpaired_brackets=Par\u00e9nteses ou s\u00edmbolos similares desemparellados
+whitespace_repetition=Pos\u00edbel erro tipogr\u00e1fico\: repet\u00edu un espazo en branco
+ro=roman\u00e9s
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_is.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_is.properties
new file mode 100644
index 0000000..488aea9
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_is.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Belarusian
+ca=Catalan
+category_case=H\u00e1stafur/l\u00e1gstafur
+category_false_friend=Falskir vinir
+category_grammar=M\u00e1lfr\u00e6\u00f0i
+category_misc=\u00ddmislegt
+category_typo=Hugsanleg ritvilla
+checkDone=Yfirlestri loki\u00f0, {0} hugsanlegar villur fundust
+checkText=&Yfirlestur
+correctionMessage=Lei\u00f0r\u00e9tting\:
+cs=t\u00e9kkneska
+da=Danish
+de=\u00fe\u00fdska
+desc_comma_whitespace=Bil \u00e1 undan kommu og \u00e1 undan/eftir sviga
+desc_double_punct=Tv\u00edtekinn punktur e\u00f0a komma
+desc_repetition=Endurteki\u00f0 or\u00f0 (t.d. 'mun mun')
+desc_repetition_short=Endurteki\u00f0 or\u00f0
+desc_unpaired_brackets=Svigar, hornklofar e\u00f0a \u00f6nnur greinarmerki standast ekki \u00e1
+desc_uppercase_sentence=Athuga hvort setning hefst \u00e1 st\u00f3rum staf
+desc_whitespacerepetition=Tv\u00f6falt bil (galli \u00e1 uppsetningu)
+double_dots_short=Tveir punktar \u00ed r\u00f6\u00f0
+double_commas_short=Tv\u00e6r kommur \u00ed r\u00f6\u00f0
+en=enska
+enterText=Skrifa\u00f0u e\u00f0a l\u00edmdu texta til a\u00f0 lesa yfir \u00ed efsta reitinn
+enterText2=Settu texta inn h\u00e9r
+errorContext=Samhengi\:
+errorMessage=Skilabo\u00f0\:
+es=sp\u00e6nska
+false_friend=Falskur vinur
+false_friend_desc=\u00c1bending um falskan vin\:
+false_friend_hint=Athuga\u00f0u\: "{0}" ({1}) merkir {2} ({3}).
+false_friend_suggestion=\u00c1ttir\u00f0u vi\u00f0 {0}?
+fr=franska
+gl=Galician
+guiCancelButton=H\u00e6tta vi\u00f0
+guiCheckComplete=LanguageTool yfirlestri er loki\u00f0.
+guiConfigWindowTitle=LanguageTool valkostir
+guiDemoText=\u00deetta er d\u00e6mi um texta sem \u00e1 a\u00f0 s\u00edna farm \u00e1 hvernig LanguageTool virkar. \u00dea\u00f0 er \u00fe\u00f3 h\u00e9rme\u00f0 gert lj\u00f3st a\u00f0 forriti\u00f0 framkv\u00e6mir ekki hef\u00f0bundna ritvilluleit.
+guiMatchCount=Hugsanlegar villur\:
+guiMenuAbout=&Um...
+guiMenuAddRules=Hla\u00f0a inn &reglum
+guiMenuCheckClipboard=&Yfirlesa texta \u00e1 klemmuspjaldi
+guiMenuFile=&Skr\u00e1
+guiMenuHelp=&Hj\u00e1lp
+guiMenuHide=&Fela \u00e1 t\u00e6kjasl\u00e1
+guiMenuOpen=&Opna...
+guiMenuOptions=&Valkostir...
+guiMenuQuit=&Loka
+guiMenuShowMainWindow=Opna a\u00f0alglugga
+guiMotherTongue=\u00deitt m\u00f3\u00f0urm\u00e1l\:
+guiNoErrorsFound=Engar villur fundust (tungum\u00e1l\: {0})
+guiNoErrorsFoundSelectedText=Engar villur fundust \u00ed v\u00f6ldum texta (tungum\u00e1l\: {0})
+guiOKButton=&\u00cd lagi
+guiOOoChangeButton=&Breyta
+guiOOoCloseButton=Loka
+guiOOoIgnoreAllButton=Sleppa \u00f6llu
+guiOOoIgnoreButton=Sleppa
+guiOOoOptionsButton=Valkostir...
+guiProgressWindowTitle=LanguageTool\: Les yfir texta...
+guiReplaceWindowTitle=Skipta \u00fat texta
+guiReplaceWithOtherText=<annar texti>
+guiRunOnPort=Keyra \u00fej\u00f3n \u00e1 netg\u00e1tt nr.
+guiSelectionCheckComplete=LanguageTool hefur loki\u00f0 yfirlestri.
+incorrect_case=\u00deessi setning hefst ekki \u00e1 h\u00e1staf
+is=Icelandic
+it=\u00edtalska
+lt=lith\u00e1\u00edska
+missing_space_after_comma=Bil vantar \u00e1 eftir kommu
+ml=Malayalam
+nl=hollenska
+no_space_after=Ekki setja bil eftir a\u00f0 svigi er opna\u00f0ur
+no_space_before=Ekki setja bil \u00e1\u00f0ur en sviga er loka\u00f0
+no_space_before_dot=Ekki setja bil \u00e1 undan punkti
+pl=p\u00f3lska
+repetition=Hugsanleg ritvilla\: or\u00f0 endurteki\u00f0
+result1=<br><b> {0}. L\u00edna {1}, d\u00e1lkur {2}</b><br>
+resultAreaText=Ni\u00f0urst\u00f6\u00f0ur birtast h\u00e9r
+resultTime=<br>T\u00edmi\: {0}ms (\u00fear af {1}ms til a\u00f0 m\u00e1ta reglur)<br>
+ru=r\u00fassneska
+sk=Slovak
+sl=sl\u00f3venska
+space_after_comma=Bil skal vera \u00e1 eftir kommu, ekki \u00e1 undan henni
+startChecking=Hef yfirlestur\: {0}
+sv=s\u00e6nska
+textLanguage=Tungum\u00e1l texta\:
+two_commas=Tv\u00e6r kommur \u00ed r\u00f6\u00f0
+two_dots=Tveir punktar \u00ed r\u00f6\u00f0
+uk=\u00fakra\u00ednska
+unpaired_brackets=Svigar (e\u00f0a svipu\u00f0 t\u00e1kn) standast ekki \u00e1
+whitespace_repetition=Hugsanleg ritvilla\: endurteki\u00f0 bil
+ro=Romanian
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_it.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_it.properties
new file mode 100644
index 0000000..ddb2e9e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_it.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bielorusso
+ca=Catalano
+category_case=Uso delle maiuscole
+category_false_friend=Falsi amici
+category_grammar=Grammatica
+category_misc=Altre
+category_typo=Possibile errore di battitura
+checkDone=Controllo completato, sono stati trovati {0} possibili errori
+checkText=&Controlla Testo
+correctionMessage=Correzione\:
+cs=Ceco
+da=Danese
+de=Tedesco
+desc_comma_whitespace=Utilizzo dello spazio prima della virgola e prima/dopo le parentesi
+desc_double_punct=Doppia battitura di punti o di virgole
+desc_repetition=Parola ripetuta (es. 'casa casa')
+desc_repetition_short=Ripetizione
+desc_unpaired_brackets=Non chiusura di parentesi, virgolette e altra punteggiatura simile
+desc_uppercase_sentence=Controlla che la frase inizi con una maiuscola
+desc_whitespacerepetition=Ripetizione dello spazio (brutta formattazione)
+double_dots_short=Due punti consecutivi
+double_commas_short=Due virgole consecutive
+en=Inglese
+enterText=Si prega di scrivere o di incollare il testo da controllare nel campo in alto
+enterText2=Si prega di inserire il testo da controllare qui
+errorContext=Contesto\:
+errorMessage=Messaggio\:
+es=Spagnolo
+false_friend=Falso amico
+false_friend_desc=suggerimento di falso amico per\:
+false_friend_hint=Suggerimento\: "{0}" ({1}) significa {2} ({3}).
+false_friend_suggestion=Intendevi forse {0}?
+fr=Francese
+gl=Galiziano
+guiCancelButton=Annulla
+guiCheckComplete=LanguageTool ha completato il controllo.
+guiConfigWindowTitle=Opzioni LanguageTool
+guiDemoText=Questo \u010d un esempio di input per dimostrare il funzionamento di LanguageTool. Si noti, per\u0148, che non include il controllo ortografico.works.
+guiMatchCount=Probabili errori\:
+guiMenuAbout=&Informazioni su...
+guiMenuAddRules=Carica file delle &Regole
+guiMenuCheckClipboard=&Contolla il testo nella Clipboard
+guiMenuFile=&File
+guiMenuHelp=&Aiuto
+guiMenuHide=&Minimizza nel System Tray
+guiMenuOpen=&Apri...
+guiMenuOptions=Opzio&ni...
+guiMenuQuit=&Esci
+guiMenuShowMainWindow=Apri la Finestra Principale
+guiMotherTongue=La tua lingua madre\:
+guiNoErrorsFound=Niente da segnalare (linguaggio\: {0})
+guiNoErrorsFoundSelectedText=Niente da segnalare nel testo selezionato (linguaggio\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&Cambia
+guiOOoCloseButton=Chiudi
+guiOOoIgnoreAllButton=Ignora Tutti
+guiOOoIgnoreButton=Ignora
+guiOOoOptionsButton=Opzioni...
+guiProgressWindowTitle=LanguageTool\: Controllo in corso...
+guiReplaceWindowTitle=Rimpiazza il testo
+guiReplaceWithOtherText=<altro testo>
+guiRunOnPort=Esegui come server sulla po&rta
+guiSelectionCheckComplete=Il controllo LanguageTool del testo selezionato \u010d completo.
+incorrect_case=Questa frase non inizia con una maiuscola
+is=Islandese
+it=Italiano
+lt=Lituano
+missing_space_after_comma=Inserire uno spazio dopo la virgola
+ml=Malayalam
+nl=Olandese
+no_space_after=Non inserire lo spazio dopo l'apertura di parentesi
+no_space_before=Non inserire lo spazio dopo la chiusura di parentesi
+no_space_before_dot=Non inserire lo spazio dopo il punto a capo
+pl=Polacco
+repetition=Possibile errore di battitura\: parola ripetuta
+result1=<br><b> {0}. Linea {1}, colonna {2}</b><br>
+resultAreaText=I rusultati appariranno qui
+resultTime=<br>Time\: {0}ms (inclusi {1}ms per il rule matching)<br>
+ru=Russo
+sk=Slovacco
+sl=Sloveno
+space_after_comma=Inserire lo spazio dopo la virgola e non prima
+startChecking=Inizio controllo a {0}
+sv=Svedese
+textLanguage=Linguaggio del testo\:
+two_commas=Due virgole consecutive
+two_dots=Due punti consecutivi
+uk=Ucraino
+unpaired_brackets=Non chiusura di parentesi o di simboli simili
+whitespace_repetition=Probabile errore\: ripetizione di spazio
+ro=Rumeno
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_lt.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_lt.properties
new file mode 100644
index 0000000..1d3ea9d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_lt.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Baltarusi\u0173
+ca=Katalon\u0173
+category_case=Did\u017eiosios/ma\u017eosios raid\u0117s
+category_false_friend=Netikras draugas
+category_grammar=Gramatika
+category_misc=Kiti
+category_typo=Possible Typo
+checkDone=Tikrinimas baigtas, rasta galim\u0173 klaid\u0173\: {0}
+checkText=Tikrinti tekst\u0105
+correctionMessage=Correction\:
+cs=\u010dek\u0173
+da=Dan\u0173
+de=vokie\u010di\u0173
+desc_comma_whitespace=Tarp\u0173 naudojimas prie\u0161/po kablel\u012f bei skliaustelius
+desc_double_punct=Ar n\u0117ra dviej\u0173 pasikartojan\u010di\u0173 ta\u0161k\u0173 ar kableli\u0173
+desc_repetition=Pasikartojantys \u017eod\u017eiai (pvz. "ar ar")
+desc_repetition_short=Word repetition
+desc_unpaired_brackets=Unpaired braces, brackets, quotation marks and similar symbols
+desc_uppercase_sentence=Tikrinti ar sakinys prasideda i\u0161 did\u017eiosios raid\u0117s
+desc_whitespacerepetition=Whitespace repetition (bad formatting)
+double_dots_short=Two consecutive dots
+double_commas_short=Two consecutive comma
+en=angl\u0173
+enterText=Tikrinimui skirt\u0105 tekst\u0105 ra\u0161ykite arba \u012fd\u0117kite vir\u0161uje
+enterText2=\u010cia para\u0161ykite arba \u012fd\u0117kite tikrinimui skirt\u0105 tekst\u0105
+errorContext=Tekstas\:
+errorMessage=Klaida\:
+es=ispan\u0173
+false_friend=False friend
+false_friend_desc=netikras draugas\:
+false_friend_hint=Hint\: "{0}" ({1}) means {2} ({3}).
+false_friend_suggestion=Did you mean {0}?
+fr=pranc\u016bz\u0173
+gl=Galician
+guiCancelButton=At\u0161aukti
+guiCheckComplete=Gramatikos tikrinimas baigtas.
+guiConfigWindowTitle=Gramatikos \u012frankio nustatymai
+guiDemoText=\u010dia yra pavyzdinis tekstas gramatikos tikrinimui tikrinimui. Atsiminkite , kad \u017eod\u017ei\u0173 ra\u0161yba netikrynama.
+guiMatchCount=Rasta galim\u0173 klaid\u0173\:
+guiMenuAbout=Apie...
+guiMenuAddRules=Load Rule File
+guiMenuCheckClipboard=Tikrinti nukopijuot\u0105 tekst\u0105 i\u0161karpin\u0117je
+guiMenuFile=Failas
+guiMenuHelp=Pagalba
+guiMenuHide=Pasl\u0117pti sistemos prane\u0161im\u0173 vietoje
+guiMenuOpen=Atverti...
+guiMenuOptions=Nustatymai...
+guiMenuQuit=I\u0161eiti
+guiMenuShowMainWindow=Open main window
+guiMotherTongue=Gimtoji kalba\:
+guiNoErrorsFound=Klaid\u0173 nerasta (teksto kalba\: {0})
+guiNoErrorsFoundSelectedText=Pa\u017eym\u0117tame tekste klaid\u0173 nerasta (teksto kalba\: {0})
+guiOKButton=Gerai
+guiOOoChangeButton=Pakeisti
+guiOOoCloseButton=U\u017edaryti
+guiOOoIgnoreAllButton=Nepaisyti visur
+guiOOoIgnoreButton=Nepaisyti
+guiOOoOptionsButton=Nustatymai...
+guiProgressWindowTitle=Gramatika\: Tikrinamas tekstas...
+guiReplaceWindowTitle=Pakeisti tekst\u0105
+guiReplaceWithOtherText=<kitas tekstas>
+guiRunOnPort=Paleisti tikrinimo server\u012f, prievadas\:
+guiSelectionCheckComplete=Pa\u017eym\u0117to teksto gramatikos tikrinimas baigtas.
+incorrect_case=Sakinys turi prasid\u0117ti i\u0161 did\u017eiosios raid\u0117s
+is=island\u0173
+it=ital\u0173
+lt=lietuvi\u0173
+missing_space_after_comma=Po kablelio reikia pad\u0117ti tarp\u0105
+ml=Malajalam\u0173
+nl=oland\u0173
+no_space_after=Nereikia d\u0117ti tarpo po atidaran\u010dio skliaustelio
+no_space_before=Nereikia d\u0117ti tarpo prie\u0161 u\u017edarant\u012f skliaustel\u012f
+no_space_before_dot=Don't put a space before the full stop
+pl=lenk\u0173
+repetition=Possible typo\: you repeated a word
+result1=<br><b> {0}. Eilut\u0117 {1}, simbolis {2}</b><br>
+resultAreaText=Rezultatai bus pateikti \u010dia
+resultTime=<br>nLaikas\: {0}ms (\u012fskaitant {1}ms taisykli\u0173 tikrinimui)<br>
+ru=rus\u0173
+sk=slovak\u0173
+sl=Slovenian
+space_after_comma=Tarp\u0105 reikia d\u0117ti po kablelio, o ne prie\u0161 j\u012f
+startChecking=Pradedamas tikrinimas kalbai\: {0}
+sv=\u0161ved\u0173
+textLanguage=Teksto kalba\:
+two_commas=Du pasikartojantys kableliai
+two_dots=Du pasikartojantys ta\u0161kai
+uk=ukrainie\u010di\u0173
+unpaired_brackets=Unpaired bracket or similar symbol
+whitespace_repetition=Possible typo\: you repeated a whitespace
+ro=Rom\u00e2n\u0103
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nb.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nb.properties
new file mode 100644
index 0000000..520908b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nb.properties
@@ -0,0 +1,188 @@
+# Norwegian Bokmål translation of LanguageTool
+# Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de), Marcin Milkowski
+
+be = Hviterrussisk
+
+ca = Katalansk
+
+category_case = Capitalization
+
+category_false_friend = False friends
+
+category_grammar = Grammatikk
+
+category_misc = Diverse
+
+category_typo = Mulig slagfeil
+
+checkDone = Kontrollert ferdig, fant {0} m
+
+checkText = &Kontroller tekst
+
+correctionMessage = Rettelse:
+
+cs = Tsjekkisk
+
+da = Dansk
+
+de = German
+
+nb = Norsk Bokmål
+
+desc_comma_whitespace = Use of whitespace before comma and before/after parentheses
+
+desc_double_punct = Use of two consecutive dots or commas
+
+desc_repetition = Repetert ord (f.eks. 'skal skal')
+
+desc_repetition_short = Repetert ord
+
+desc_unpaired_brackets = Unpaired braces, brackets, quotation marks and similar symbols
+
+desc_uppercase_sentence = Checks that a sentence starts with an uppercase letter
+
+desc_whitespacerepetition = Whitespace repetition (bad formatting)
+
+double_dots_short = Two consecutive dots
+
+double_commas_short = Two consecutive comma
+
+en = Engelsk
+
+enterText = Please type or paste text to check in the top area
+
+enterText2 = Please insert text to check here
+
+eo = Esperanto
+
+errorContext = Sammenheng:
+
+errorMessage = Melding:
+
+es = Spansk
+
+false_friend = False friend
+
+false_friend_desc = false friend hint for:
+
+false_friend_hint = Vink: "{0}" ({1}) betyr {2} ({3}).
+
+false_friend_suggestion = Mente du {0}?
+
+fr = Fransk
+
+gl = Galician
+
+guiCancelButton = Avbryt
+
+guiCheckComplete = LanguageTool check is complete.
+
+guiConfigWindowTitle = LanguageTool Options
+
+guiDemoText = This is a example input to to show you how LanguageTool works. Note, however, that it does not include a spell checka.
+
+guiMatchCount = Potential errors:
+
+guiMenuAbout = &About...
+
+guiMenuAddRules = Load &Rule File
+
+guiMenuCheckClipboard = &Check Text in Clipboard
+
+guiMenuFile = &File
+
+guiMenuHelp = &Help
+
+guiMenuHide = &Hide to System Tray
+
+guiMenuOpen = &Open...
+
+guiMenuOptions = Option&s...
+
+guiMenuQuit = &Quit
+
+guiMenuShowMainWindow = Open Main Window
+
+guiMotherTongue = Your mother tongue:
+
+guiNoErrorsFound = No errors or warnings found (language: {0})
+
+guiNoErrorsFoundSelectedText = No errors or warnings found in selected text (language: {0})
+
+guiOKButton = &OK
+
+guiOOoChangeButton = &Change
+
+guiOOoCloseButton = Close
+
+guiOOoIgnoreAllButton = Ignore All
+
+guiOOoIgnoreButton = Ignore
+
+guiOOoOptionsButton = Options...
+
+guiProgressWindowTitle = LanguageTool: Checking Text...
+
+guiReplaceWindowTitle = Replace text
+
+guiReplaceWithOtherText = <other text>
+
+guiRunOnPort = Run as server on po&rt
+
+guiSelectionCheckComplete = LanguageTool check of selected text is complete.
+
+incorrect_case = This sentence does not start with an uppercase letter
+
+is = Icelandic
+
+it = Italian
+
+lt = Lithuanian
+
+missing_space_after_comma = Put a space after the comma
+
+ml= Malayalam
+
+nl = Dutch
+
+no_space_after = Don't put a space after the opening parenthesis
+
+no_space_before = Don't put a space before the closing parenthesis
+
+no_space_before_dot = Don't put a space before the full stop
+
+pl = Polish
+
+repetition = Possible typo: you repeated a word
+
+result1 = <br><b> {0}. Line {1}, column {2}</b><br>
+
+resultAreaText = Results will appear here
+
+resultTime = <br>Time: {0}ms (including {1}ms for rule matching)<br>
+
+ru = Russian
+
+sk = Slovak
+
+sl = Slovenian
+
+space_after_comma = Put a space after the comma, but not before the comma
+
+startChecking = Starting check in {0}
+
+sv = Swedish
+
+textLanguage = Text Language:
+
+two_commas = Two consecutive commas
+
+two_dots = Two consecutive dots
+
+uk = Ukrainian
+
+unpaired_brackets = Unpaired bracket or similar symbol
+
+whitespace_repetition = Possible typo: you repeated a whitespace
+
+ro = Romanian
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nl.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nl.properties
new file mode 100644
index 0000000..0825e8b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_nl.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Witrussisch
+ca=Catalaans
+category_case=Hoofdlettergebruik
+category_false_friend=Begripsverwarring
+category_grammar=Grammatica
+category_misc=Diversen
+category_typo=Mogelijke typefouten
+checkDone=Klaar, {0} aandachtspunten
+checkText=Tekst controleren
+correctionMessage=Suggestie\:
+cs=Tsjechisch
+da=Deens
+de=Duits
+desc_comma_whitespace=Spatie voor of achter haakje
+desc_double_punct=Twee komma's of punten
+desc_repetition=Herhaling (bijv. 'de de')
+desc_repetition_short=Herhaling
+desc_unpaired_brackets=Onjuist gecombineerde leestekens
+desc_uppercase_sentence=Controleert of een zin begint met een hoofdletter
+desc_whitespacerepetition=Teveel witruimte tussen woorden
+double_dots_short=Teveel punten
+double_commas_short=Teveel komma's
+en=Engels
+enterText=Voer in het bovenste vak de te controleren tekst in
+enterText2=Voer hier de te controleren tekst in
+errorContext=Context\:
+errorMessage=Melding\:
+es=Spaans
+false_friend=Betekenisverwarring
+false_friend_desc=Hint bij mogelijke verwisseling van begrippen\:
+false_friend_hint=Hint\: "{0}" ({1}) betekent {2} ({3}).
+false_friend_suggestion=Bedoelde u {0}?
+fr=Frans
+gl=Galicisch
+guiCancelButton=Annuleren
+guiCheckComplete=LanguageTool-controle gereed.
+guiConfigWindowTitle=LanguageTool-opties
+guiDemoText=Dit is een voorbeeld om te laten zien hoe Languagetool werkt. Besef wel dat het geen spellingcontrole bevat.
+guiMatchCount=Aandachtspunten\:
+guiMenuAbout=Over...
+guiMenuAddRules=Inlezen regelbestand
+guiMenuCheckClipboard=Tekst van klembord controleren
+guiMenuFile=Bestand
+guiMenuHelp=Hulp
+guiMenuHide=Naar systeemvak
+guiMenuOpen=Openen...
+guiMenuOptions=Opties...
+guiMenuQuit=Afsluiten
+guiMenuShowMainWindow=Open het hoofdvenster
+guiMotherTongue=Moedertaal\:
+guiNoErrorsFound=Geen aandachtspunten gevonden (taal\: {0})
+guiNoErrorsFoundSelectedText=Geen aandachtspunten in de geselecteerde tekst(taal\: {0})
+guiOKButton=&Ok\u00e9
+guiOOoChangeButton=Wijzigen
+guiOOoCloseButton=Sluiten
+guiOOoIgnoreAllButton=Alles negeren
+guiOOoIgnoreButton=Negeer
+guiOOoOptionsButton=Opties...
+guiProgressWindowTitle=LanguageTool\: Tekst controleren...
+guiReplaceWindowTitle=Tekst vervangen
+guiReplaceWithOtherText=<nieuwe tekst>
+guiRunOnPort=Voer uit als server op poort
+guiSelectionCheckComplete=LanguageTool\: controle van geselecteerde tekst gereed.
+incorrect_case=Deze zin begint niet met een hoofdletter
+is=IJslands
+it=Italiaans
+lt=Litouws
+missing_space_after_comma=Zet een spatie na de komma
+ml=Maleis
+nl=Nederlands
+no_space_after=Zet geen spatie na een haakje openen
+no_space_before=Zet geen spatie voor een haakje sluiten
+no_space_before_dot=Zet geen spatie voor een punt
+pl=Pools
+repetition=Mogelijke typefout\: herhaling van woord
+result1=<br><b> {0}. Regel {1}, positie {2}</b><br>
+resultAreaText=Resultaten verschijnen hier. Suggesties voor (verbeteringen van) Nederlandse regels kunt u inbrengen via\: www.opentaal.org en opentaal@lists.sf.own-it.nl.
+resultTime=<br>Duur\: {0}ms (inclusief {1}ms voor de regels)<br>
+ru=Russisch
+sk=Slovaaks
+sl=Sloveens
+space_after_comma=Zet een spatie na een komma, maar niet ervoor
+startChecking=Start controle in {0}
+sv=Zweeds
+textLanguage=Teksttaal\:
+two_commas=Twee opeenvolgende komma's
+two_dots=Twee opeenvolgende punten
+uk=Oekra\u00efens
+unpaired_brackets=Oneven aantal teksthaken
+whitespace_repetition=Teveel witruimte
+ro=Roemeens
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_pl.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_pl.properties
new file mode 100644
index 0000000..ca00fb5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_pl.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=esperanto
+be=bia\u0142oruski
+ca=katalo\u0144ski
+category_case=Pisownia ma\u0142\u0105 i wielk\u0105 liter\u0105
+category_false_friend=Fa\u0142szywi przyjaciele
+category_grammar=Gramatyka
+category_misc=B\u0142\u0119dy r\u00f3\u017cne
+category_typo=Prawdopodobna liter\u00f3wka
+checkDone=Sprawdzono, liczba znalezionych potencjalnych problem\u00f3w\: {0}
+checkText=&Sprawd\u017a tekst
+correctionMessage=Proponowana poprawka\:
+cs=Czeski
+da=Du\u0144ski
+de=Niemiecki
+desc_comma_whitespace=Odst\u0119py przed przecinkami oraz przed nawiasami i po nawiasach
+desc_double_punct=Podw\u00f3jne kropki lub przecinki
+desc_repetition=Powt\u00f3rzenie wyrazu (np. \u201ejest jest\u201d)
+desc_repetition_short=Powt\u00f3rzenie wyrazu
+desc_unpaired_brackets=Niesparowane nawiasy, cudzys\u0142owy i podobne
+desc_uppercase_sentence=Test, czy zdanie zaczyna si\u0119 wielk\u0105 liter\u0105
+desc_whitespacerepetition=Powt\u00f3rzenie spacji (b\u0142\u0119dne formatowanie)
+double_dots_short=Dwie kropki
+double_commas_short=Dwa przecinki
+en=Angielski
+enterText=Wpisz lub wklej tekst do sprawdzenia w g\u00f3rnym polu
+enterText2=Wstaw tutaj tekst do sprawdzenia
+errorContext=Kontekst\:
+errorMessage=Komunikat\:
+es=Hiszpa\u0144ski
+false_friend=Fa\u0142szywi przyjaciele
+false_friend_desc=wskaz\u00f3wka dotycz\u0105ca fa\u0142szywych przyjaci\u00f3\u0142\:
+false_friend_hint=Wskaz\u00f3wka\: "{0}" ({1}) oznacza {2} ({3})
+false_friend_suggestion=Czy chodzi o {0}?
+fr=francuski
+gl=galisyjski
+guiCancelButton=Anuluj
+guiCheckComplete=Program LanguageTool zako\u0144czy\u0142 sprawdzanie.
+guiConfigWindowTitle=Opcje LanguageTool
+guiDemoText=To jest przyk\u0142adowy tekst kt\u00f3ry pokazuje, jak jak dzia\u0142a LanguageTool. LanguageTool nie zawiera jadnak korektora psowni.
+guiMatchCount=Potencjalne b\u0142\u0119dy\:
+guiMenuAbout=&Informacje...
+guiMenuAddRules=Otw\u00f3rz plik &regu\u0142
+guiMenuCheckClipboard=Sprawd\u017a &tekst ze schowka
+guiMenuFile=&Plik
+guiMenuHelp=Pomo&c
+guiMenuHide=Schowaj do paska &zada\u0144
+guiMenuOpen=&Otw\u00f3rz...
+guiMenuOptions=&Opcje...
+guiMenuQuit=Za&ko\u0144cz
+guiMenuShowMainWindow=Otw\u00f3rz g\u0142\u00f3wne okno
+guiMotherTongue=J\u0119zyk ojczysty\:
+guiNoErrorsFound=Nie znaleziono b\u0142\u0119d\u00f3w ani usterek (j\u0119zyk\: {0})
+guiNoErrorsFoundSelectedText=W zaznaczonym tek\u015bcie nie znaleziono b\u0142\u0119d\u00f3w ani usterek (j\u0119zyk\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&Zmie\u0144
+guiOOoCloseButton=Zamknij
+guiOOoIgnoreAllButton=Ignoruj wszystkie
+guiOOoIgnoreButton=Ignoruj
+guiOOoOptionsButton=&Opcje...
+guiProgressWindowTitle=LanguageTool\: Sprawdzanie tekstu...
+guiReplaceWindowTitle=Zast\u0105p tekst
+guiReplaceWithOtherText=<inny tekst>
+guiRunOnPort=Uruchom jako serwer na po&rcie
+guiSelectionCheckComplete=Sprawdzanie zaznaczonego tekstu w programie LanguageTool zosta\u0142o zako\u0144czone.
+incorrect_case=To zdanie nie zaczyna si\u0119 wielk\u0105 liter\u0105
+is=islandzki
+it=w\u0142oski
+lt=litewski
+missing_space_after_comma=Po przecinku wstawiamy spacj\u0119
+ml=malayalam
+nl=niderlandzki
+no_space_after=Nie wstawiamy spacji po nawiasie otwieraj\u0105cym
+no_space_before=Nie wstawiamy spacji przed nawiasem zamykaj\u0105cym
+no_space_before_dot=Nie wstawiamy spacji przed kropk\u0105
+pl=polski
+repetition=Prawdopodobna liter\u00f3wka\: powt\u00f3rzony wyraz
+result1=<br><b> {0}. Wiersz {1}, kolumna {2}</b><br>
+resultAreaText=Miejsce na wyniki
+resultTime=<br>Czas\: {0}ms (w tym dopasowywanie regu\u0142\: {1}ms)<br>
+ru=rosyjski
+sk=s\u0142owacki
+sl=s\u0142owe\u0144ski
+space_after_comma=Spacj\u0119 wstawiamy po przecinku, nie przed przecinkiem
+startChecking=Sprawdzanie w j\u0119zyku\: {0}
+sv=szwedzki
+textLanguage=J\u0119zyk tekstu\:
+two_commas=Dwa przecinki
+two_dots=Dwie kropki
+uk=ukrai\u0144ski
+unpaired_brackets=Niesparowany cudzys\u0142\u00f3w, nawias itd.
+whitespace_repetition=Prawdopodobna liter\u00f3wka\: wiele spacji z rz\u0119du
+ro=rumu\u0144ski
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ro.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ro.properties
new file mode 100644
index 0000000..80f49e1
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ro.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bielorus\u0103
+ca=Catalan\u0103
+category_case=Capitalizare
+category_false_friend=Prieteni fal\u0219i
+category_grammar=Gramatic\u0103
+category_misc=Diverse
+category_typo=Posibil\u0103 gre\u0219eal\u0103 de tastare
+checkDone=Verificare complet\u0103, s-au g\u0103sit {0} probleme probabile
+checkText=Verifi&c\u0103 textul
+correctionMessage=Corectare\:
+cs=Ceh\u0103
+da=Danez\u0103
+de=German\u0103
+desc_comma_whitespace=Spa\u021bii puse \u00eenainte de virgul\u0103 sau \u00eenainte/dup\u0103 paranteze
+desc_double_punct=S-au folosit dou\u0103 puncte sau virgule consecutive
+desc_repetition=Cuv\u00e2nt repetat (ex\: \u201evoi voi\u201d)
+desc_repetition_short=Cuv\u00e2nt repetat
+desc_unpaired_brackets=Acolade, paranteze, ghilimele sau alte simboluri similare desperecheate
+desc_uppercase_sentence=Verific\u0103 dac\u0103 propozi\u021bia \u00eencepe cu liter\u0103 mare
+desc_whitespacerepetition=Spa\u021biu repetat (formatare gre\u0219it\u0103)
+double_dots_short=Dou\u0103 puncte consecutive
+double_commas_short=Dou\u0103 virgule consecutive
+en=Englez\u0103
+enterText=V\u0103 rug\u0103m s\u0103 tasta\u021bi sau s\u0103 lipi\u021bi textul de verificat \u00een zona de sus
+enterText2=V\u0103 rug\u0103m s\u0103 insera\u021bi textul de verificat aici
+errorContext=Context\:
+errorMessage=Mesaj\:
+es=Spaniol\u0103
+false_friend=Prieten fals
+false_friend_desc=indiciu de prieten fals pentru\:
+false_friend_hint=Indiciu\: \u201e{0}\u201d ({1}) \u00eenseamn\u0103 {2} ({3}).
+false_friend_suggestion=A\u021bi vrut s\u0103 scrie\u021bi {0}?
+fr=Francez\u0103
+gl=Galician
+guiCancelButton=Renun\u021b\u0103
+guiCheckComplete=Verificarea f\u0103cut\u0103 de LanguageTool este complet\u0103.
+guiConfigWindowTitle=Op\u021biuni pentru LanguageTool
+guiDemoText=Acesta este un exemplu pentru a v\u0103 v\u0103 ar\u0103ta cum func\u021bioneaz\u0103 LanguageTool. Re\u021bine\u021bi c\u0103 nu include \u0219i verificare ortografic\u0103.
+guiMatchCount=Posibile erori\:
+guiMenuAbout=&Despre...
+guiMenuAddRules=\u00eencarc\u0103 un fi\u0219ier de &reguli
+guiMenuCheckClipboard=Verifi&c\u0103 textul din memorie
+guiMenuFile=&Fi\u0219ier
+guiMenuHelp=&Ajutor
+guiMenuHide=&Ascunde \u00een zona de notificare
+guiMenuOpen=&Deschide...
+guiMenuOptions=&Op\u021biuni...
+guiMenuQuit=&Ie\u0219ire
+guiMenuShowMainWindow=Deschide fereastra principal\u0103
+guiMotherTongue=Limba dumneavoastr\u0103 matern\u0103\:
+guiNoErrorsFound=Nu s-au g\u0103sit erori sau avertismente (limba {0})
+guiNoErrorsFoundSelectedText=Nu s-au g\u0103sit erori sau avertismente \u00een textul selectat (limba {0})
+guiOKButton=&OK
+guiOOoChangeButton=S&chimb\u0103
+guiOOoCloseButton=\u00eenchide
+guiOOoIgnoreAllButton=Ignor\u0103 tot
+guiOOoIgnoreButton=Ignor\u0103
+guiOOoOptionsButton=Op\u021biuni...
+guiProgressWindowTitle=LanguageTool\: Se verific\u0103 textul...
+guiReplaceWindowTitle=\u00eenlocuire text
+guiReplaceWithOtherText=<alt text>
+guiRunOnPort=Ruleaz\u0103 ca server pe po&rtul
+guiSelectionCheckComplete=Verificarea LanguageTool a textului selectat este complet\u0103.
+incorrect_case=Propozi\u021bia nu \u00eencepe cu liter\u0103 mare
+is=Islandez\u0103
+it=Italian\u0103
+lt=Lituanian\u0103
+missing_space_after_comma=Pune\u021bi un spa\u021biu dup\u0103 virgul\u0103
+ml=Malayalam
+nl=Olandez\u0103
+no_space_after=Nu pune\u021bi spa\u021biu dup\u0103 deschiderea parantezei
+no_space_before=Nu pune\u021bi spa\u021biu dup\u0103 \u00eenchiderea parantezei
+no_space_before_dot=Nu pune\u021bi spa\u021biu \u00eenainte de punct
+pl=Polonez\u0103
+repetition=Posibil\u0103 gre\u0219eal\u0103\: a\u021bi repetat un cuv\u00e2nt
+result1=<br><b> {0}. Linia {1}, coloana {2}</b><br>
+resultAreaText=Rezultatele vor ap\u0103rea aici
+resultTime=<br>Durat\u0103\: {0}ms (inclusiv {1}ms pentru potrivirea regulilor)<br>
+ru=Rus\u0103
+sk=Slovac\u0103
+sl=Sloven\u0103
+space_after_comma=Pune un spa\u021biu dup\u0103 virgul\u0103, dar nu \u00eenainte de virgul\u0103
+startChecking=Verificarea \u00eencepe \u00een {0}
+sv=Suedez\u0103
+textLanguage=Limba textului\:
+two_commas=Dou\u0103 virgule consecutive
+two_dots=Dou\u0103 puncte consecutive
+uk=Ukrainian
+unpaired_brackets=Parantez\u0103 nepereche
+whitespace_repetition=Posibi\u0103 gre\u0219eal\u0103\: a\u021bi repetat un spa\u021biu
+ro=Rom\u00e2n\u0103
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ru.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ru.properties
new file mode 100644
index 0000000..5c27320
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_ru.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=\u0411\u0435\u043b\u043e\u0440\u0443\u0441\u0441\u043a\u0438\u0439
+ca=\u041a\u0430\u0442\u0430\u043b\u0430\u043d\u0441\u043a\u0438\u0439
+category_case=\u0417\u0430\u0433\u043b\u0430\u0432\u043d\u044b\u0435 \u0431\u0443\u043a\u0432\u044b
+category_false_friend=\u041e\u043c\u043e\u043d\u0438\u043c\u044b
+category_grammar=\u0413\u0440\u0430\u043c\u043c\u0430\u0442\u0438\u043a\u0430
+category_misc=\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438
+category_typo=\u041e\u043f\u0435\u0447\u0430\u0442\u043a\u0430
+checkDone=\u041f\u0440\u043e\u0432\u0435\u0440\u043a\u0430 \u0437\u0430\u043a\u043e\u043d\u0447\u0435\u043d\u0430, {0} \u043f\u043e\u0442\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u044b\u0445 \u043e\u0448\u0438\u0431\u043e\u043a \u043d\u0430\u0439\u0434\u0435\u043d\u043e
+checkText=&\u041f\u0440\u043e\u0432\u0435\u0440\u0438\u0442\u044c \u0442\u0435\u043a\u0441\u0442
+correctionMessage=\u0418\u0441\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u0438\u044f\:
+cs=\u0427\u0435\u0448\u0441\u043a\u0438\u0439
+da=\u0414\u0430\u0442\u0441\u043a\u0438\u0439
+de=\u041d\u0435\u043c\u0435\u0446\u043a\u0438\u0439
+desc_comma_whitespace=\u041f\u0440\u043e\u0431\u0435\u043b\u044b \u043f\u0435\u0440\u0435\u0434 \u0437\u0430\u043f\u044f\u0442\u043e\u0439 \u0438\u043b\u0438 \u043f\u0435\u0440\u0435\u0434/\u043f\u043e\u0441\u043b\u0435 \u0441\u043a\u043e\u0431\u043e\u043a
+desc_double_punct=\u0414\u0432\u0435 \u0437\u0430\u043f\u044f\u0442\u044b\u0435 \u0438\u043b\u0438 \u0442\u043e\u0447\u043a\u0438 \u043f\u043e\u0434\u0440\u044f\u0434
+desc_repetition=\u041f\u043e\u0432\u0442\u043e\u0440 \u0441\u043b\u043e\u0432 (\u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440\: \u201c\u043e\u043d \u043e\u043d\u201d)
+desc_repetition_short=\u041f\u043e\u0432\u0442\u043e\u0440 \u0441\u043b\u043e\u0432\u0430
+desc_unpaired_brackets=\u041d\u0435\u043f\u0430\u0440\u043d\u044b\u0435 \u0441\u043a\u043e\u0431\u043a\u0438 \u0438\u043b\u0438 \u0430\u043f\u043e\u0441\u0442\u0440\u043e\u0444\u044b
+desc_uppercase_sentence=\u041f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u0434\u043e\u043b\u0436\u043d\u043e \u043d\u0430\u0447\u0438\u043d\u0430\u0442\u044c\u0441\u044f \u0441 \u0437\u0430\u0433\u043b\u0430\u0432\u043d\u043e\u0439 \u0431\u0443\u043a\u0432\u044b
+desc_whitespacerepetition=\u041f\u043e\u0432\u0442\u043e\u0440 \u043f\u0440\u043e\u0431\u0435\u043b\u0430
+double_dots_short=\u0414\u0432\u0435 \u0442\u043e\u0447\u043a\u0438
+double_commas_short=\u0414\u0432\u0435 \u0437\u0430\u043f\u044f\u0442\u044b\u0435
+en=\u0410\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u0438\u0439
+enterText=\u041d\u0430\u043f\u0435\u0447\u0430\u0442\u0430\u0439\u0442\u0435 \u0438\u043b\u0438 \u0432\u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u0442\u0435\u043a\u0441\u0442 \u0434\u043b\u044f \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438
+enterText2=\u041d\u0430\u043f\u0435\u0447\u0430\u0442\u0430\u0439\u0442\u0435 \u0438\u043b\u0438 \u0432\u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u0442\u0435\u043a\u0441\u0442 \u0434\u043b\u044f \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438
+errorContext=\u041e\u0448\u0438\u0431\u043a\u0430 \u043a\u043e\u043d\u0442\u0435\u043a\u0441\u0442\u0430\:
+errorMessage=\u041e\u0448\u0438\u0431\u043a\u0430\:
+es=\u0418\u0441\u043f\u0430\u043d\u0441\u043a\u0438\u0439
+false_friend=\u041e\u043c\u043e\u043d\u0438\u043c\u044b
+false_friend_desc=\u0417\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u043e\u043c\u043e\u043d\u0438\u043c\u043e\u0432\:
+false_friend_hint=\u041f\u043e\u0434\u0441\u043a\u0430\u0437\u043a\u0430\: "{0}" ({1}) \u043e\u0437\u043d\u0430\u0447\u0430\u0435\u0442 {2} ({3}).
+false_friend_suggestion=\u041f\u0440\u0435\u0434\u043f\u043e\u043b\u0430\u0433\u0430\u0435\u0442\u0441\u044f {0}?
+fr=\u0424\u0440\u0430\u043d\u0446\u0443\u0437\u0441\u043a\u0438\u0439
+gl=\u0413\u0430\u043b\u0438\u0441\u0438\u0439\u0441\u043a\u0438\u0439
+guiCancelButton=\u041e\u0442\u043c\u0435\u043d\u0430
+guiCheckComplete=LanguageTool \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0443.
+guiConfigWindowTitle=\u041d\u0430\u0441\u0442\u0440\u043e\u0439\u043a\u0438 LanguageTool
+guiDemoText=\u042d\u0442\u043e \u043f\u0440\u0438\u043c\u0435\u0440 \u0440\u0430\u0431\u043e\u0442\u044b LanguageTool. \u041e\u043d \u043e\u043d \u0441\u043e\u0437\u0434\u0430\u043d \u0434\u043b\u044f \u043f\u043e\u0438\u0441\u043a\u0430 \u0433\u0440\u0430\u043c\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 \u043e\u0448\u0438\u0431\u043e\u043a..
+guiMatchCount=\u041f\u043e\u0442\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u044b\u0435 \u043e\u0448\u0438\u0431\u043a\u0438\:
+guiMenuAbout=&\u041e...
+guiMenuAddRules=\u0417\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u044c \u0444\u0430\u0439\u043b &\u041f\u0440\u0430\u0432\u0438\u043b
+guiMenuCheckClipboard=\u041f&\u0440\u043e\u0432\u0435\u0440\u0438\u0442\u044c \u0442\u0435\u043a\u0441\u0442 \u0432 \u0431\u0443\u0444\u0435\u0440\u0435 \u043e\u0431\u043c\u0435\u043d\u0430
+guiMenuFile=&\u0424\u0430\u0439\u043b
+guiMenuHelp=\u041f\u043e&\u043c\u043e\u0449\u044c
+guiMenuHide=&\u0421\u043f\u0440\u044f\u0442\u0430\u0442\u044c \u0432 \u0442\u0440\u0435\u0439
+guiMenuOpen=\u041e&\u0442\u043a\u0440\u044b\u0442\u044c...
+guiMenuOptions=\u041a\u043e&\u043d\u0444\u0438\u0433\u0443\u0440\u0430\u0446\u0438\u044f...
+guiMenuQuit=&\u0412\u044b\u0445\u043e\u0434
+guiMenuShowMainWindow=\u041e\u0442\u043a\u0440\u044b\u0442\u044c \u0433\u043b\u0430\u0432\u043d\u043e\u0435 \u043e\u043a\u043d\u043e
+guiMotherTongue=\u042f\u0437\u044b\u043a\:
+guiNoErrorsFound=\u041d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e \u043e\u0448\u0438\u0431\u043e\u043a \u0438 \u043f\u0440\u0435\u0434\u0443\u043f\u0440\u0435\u0436\u0434\u0435\u043d\u0438\u0439 (\u044f\u0437\u044b\u043a\: {0})
+guiNoErrorsFoundSelectedText=\u041d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u043e \u043e\u0448\u0438\u0431\u043e\u043a \u0438 \u043f\u0440\u0435\u0434\u0443\u043f\u0440\u0435\u0436\u0434\u0435\u043d\u0438\u0439 (\u044f\u0437\u044b\u043a\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&\u0418\u0437\u043c\u0435\u043d\u0438\u0442\u044c
+guiOOoCloseButton=\u0417\u0430\u043a\u0440\u044b\u0442\u044c
+guiOOoIgnoreAllButton=\u0418\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0432\u0441\u0451
+guiOOoIgnoreButton=\u0418\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c
+guiOOoOptionsButton=\u041e\u043f\u0446\u0438\u0438...
+guiProgressWindowTitle=LanguageTool\: \u041f\u0440\u043e\u0432\u0435\u0440\u043a\u0430 \u0442\u0435\u043a\u0441\u0442\u0430...
+guiReplaceWindowTitle=\u0417\u0430\u043c\u0435\u043d\u0438\u0442\u044c \u0442\u0435\u043a\u0441\u0442
+guiReplaceWithOtherText=<\u043f\u0440\u043e\u0447\u0438\u0439 \u0442\u0435\u043a\u0441\u0442>
+guiRunOnPort=\u0417\u0430\u043f&\u0443\u0441\u0442\u0438\u0442\u044c \u043a\u0430\u043a \u0441\u0435\u0442\u0435\u0432\u043e\u0439 \u0441\u0435\u0440\u0432\u0435\u0440. \u041f\u043e\u0440\u0442\:
+guiSelectionCheckComplete=LanguageTool \u0437\u0430\u0432\u0435\u0440\u0448\u0438\u043b \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0443 \u0432\u044b\u0434\u0435\u043b\u0435\u043d\u043d\u043e\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0430.
+incorrect_case=\u042d\u0442\u043e \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u0435 \u043d\u0435 \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442\u0441\u044f \u0441 \u0437\u0430\u0433\u043b\u0430\u0432\u043d\u043e\u0439 \u0431\u0443\u043a\u0432\u044b
+is=\u0418\u0441\u043b\u0430\u043d\u0434\u0441\u043a\u0438\u0439
+it=\u0418\u0442\u0430\u043b\u044c\u044f\u043d\u0441\u043a\u0438\u0439
+lt=\u041b\u0438\u0442\u043e\u0432\u0441\u043a\u0438\u0439
+missing_space_after_comma=\u041f\u043e\u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u043f\u0440\u043e\u0431\u0435\u043b \u043f\u043e\u0441\u043b\u0435 \u0437\u0430\u043f\u044f\u0442\u043e\u0439
+ml=\u041c\u0430\u043b\u0430\u0439\u0441\u043a\u0438\u0439
+nl=\u041d\u0438\u0434\u0435\u0440\u043b\u0430\u043d\u0434\u0441\u043a\u0438\u0439
+no_space_after=\u041d\u0435 \u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u043f\u0440\u043e\u0431\u0435\u043b \u043f\u043e\u0441\u043b\u0435 \u043e\u0442\u043a\u0440\u044b\u0432\u0430\u044e\u0449\u0435\u0439\u0441\u044f \u0441\u043a\u043e\u0431\u043a\u0438
+no_space_before=\u041d\u0435 \u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u043f\u0440\u043e\u0431\u0435\u043b \u0434\u043e \u0437\u0430\u043a\u0440\u044b\u0432\u0430\u044e\u0449\u0435\u0439\u0441\u044f \u0441\u043a\u043e\u0431\u043a\u0438
+no_space_before_dot=\u041d\u0435 \u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u043f\u0440\u043e\u0431\u0435\u043b \u043f\u0435\u0440\u0435\u0434 \u0442\u043e\u0447\u043a\u043e\u0439 \u0432 \u043a\u043e\u043d\u0446\u0435 \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0435\u043d\u0438\u044f
+pl=\u041f\u043e\u043b\u044c\u0441\u043a\u0438\u0439
+repetition=\u0412\u043e\u0437\u043c\u043e\u0436\u043d\u0430\u044f \u043e\u043f\u0435\u0447\u0430\u0442\u043a\u0430\: \u043f\u043e\u0432\u0442\u043e\u0440 \u0441\u043b\u043e\u0432\u0430
+result1=<br><b> {0}. \u0421\u0442\u0440\u043e\u043a\u0430 {1}, \u0421\u0442\u043e\u043b\u0431\u0435\u0446 {2}</b><br>
+resultAreaText=\u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438 \u0431\u0443\u0434\u0435\u0442 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d \u0437\u0434\u0435\u0441\u044c
+resultTime=<br>\u0412\u0440\u0435\u043c\u044f\: {0}ms (\u0432\u043a\u043b\u044e\u0447\u0430\u044f {1}ms \u0434\u043b\u044f \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u043f\u0440\u0430\u0432\u0438\u043b)<br>
+ru=\u0420\u0443\u0441\u0441\u043a\u0438\u0439
+sk=\u0421\u043b\u043e\u0432\u0430\u0446\u043a\u0438\u0439
+sl=\u0421\u043b\u043e\u0432\u0435\u043d\u0441\u043a\u0438\u0439
+space_after_comma=\u041f\u043e\u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u043f\u0440\u043e\u0431\u0435\u043b \u043f\u043e\u0441\u043b\u0435 \u0437\u0430\u043f\u044f\u0442\u043e\u0439, \u0430 \u043d\u0435 \u043f\u0435\u0440\u0435\u0434 \u043d\u0435\u0439
+startChecking=\u041d\u0430\u0447\u0430\u043b\u043e \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438. \u042f\u0437\u044b\u043a\: {0}
+sv=\u0428\u0432\u0435\u0434\u0441\u043a\u0438\u0439
+textLanguage=\u042f\u0437\u044b\u043a \u0442\u0435\u043a\u0441\u0442\u0430\:
+two_commas=\u0414\u0432\u0435 \u0437\u0430\u043f\u044f\u0442\u044b\u0435 \u043f\u043e\u0434\u0440\u044f\u0434
+two_dots=\u0414\u0432\u0435 \u0442\u043e\u0447\u043a\u0438 \u043f\u043e\u0434\u0440\u044f\u0434
+uk=\u0423\u043a\u0440\u0430\u0438\u043d\u0441\u043a\u0438\u0439
+unpaired_brackets=\u041d\u0435\u043f\u0430\u0440\u043d\u0430\u044f \u0441\u043a\u043e\u0431\u043a\u0430, \u0430\u043f\u043e\u0441\u0442\u0440\u043e\u0444 \u0438\u043b\u0438 \u043a\u0430\u0432\u044b\u0447\u043a\u0430
+whitespace_repetition=\u041f\u043e\u0432\u0442\u043e\u0440 \u043f\u0440\u043e\u0431\u0435\u043b\u0430
+ro=\u0420\u0443\u043c\u044b\u043d\u0441\u043a\u0438\u0439
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sk.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sk.properties
new file mode 100644
index 0000000..6a71621
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sk.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Bielorusky
+ca=Katal\u00e1nsky
+category_case=Ve\u013ek\u00e9 a mal\u00e9 p\u00edsmen\u00e1
+category_false_friend=Falo\u0161n\u00fd priatelia
+category_grammar=Gramatika
+category_misc=R\u00f4zne
+category_typo=Mo\u017en\u00fd preklep
+checkDone=Kontrola je ukon\u010den\u00e1, {0} n\u00e1jden\u00fdch potenci\u00e1lnych ch\u00fdb.
+checkText=S&kontrolova\u0165
+correctionMessage=Oprava\:
+cs=\u010cesky
+da=D\u00e1nsky
+de=Nemecky
+desc_comma_whitespace=Pou\u017eitie medzery pred \u010diarkou a pred/za z\u00e1tvorkami
+desc_double_punct=Pou\u017eitie dvoch za sebou id\u00facich bodiek alebo \u010diarok
+desc_repetition=Opakovanie slov (napr. 'bude bude')
+desc_repetition_short=Opakovanie slov
+desc_unpaired_brackets=Nesp\u00e1rovan\u00e9 z\u00e1tvorky, \u00favodzovky alebo podobn\u00e9 symboly
+desc_uppercase_sentence=Skontrolujte, \u010di veta za\u010d\u00edna ve\u013ek\u00fdmi po\u010diato\u010dn\u00fdmi p\u00edsmenami
+desc_whitespacerepetition=Opakovanie "bielych znakov" napr. medzier (zl\u00e9 form\u00e1tovanie)
+double_dots_short=Dve za sebou id\u00face bodky
+double_commas_short=Dve za sebou id\u00face \u010diarky
+en=Anglicky
+enterText=Vlo\u017ete alebo nap\u00ed\u0161te text, ktor\u00fd chcete skontrolova\u0165 do hornej \u010dasti
+enterText2=Sem vlo\u017ete po\u017eadovan\u00fd text
+errorContext=Kontext\:
+errorMessage=Spr\u00e1va\:
+es=\u0160panielsky
+false_friend=Falo\u0161n\u00fd priate\u013e
+false_friend_desc=falo\u0161n\u00fd priate\u013e, tip\:\:
+false_friend_hint=Tip\: "{0}" ({1}) znamen\u00e1 {2} ({3}).
+false_friend_suggestion=M\u00e1te na mysli {0}?
+fr=Franc\u00fazsky
+gl=Gal\u00edcky
+guiCancelButton=Zru\u0161i\u0165
+guiCheckComplete=LanguageTool kontrola bola ukon\u010den\u00e1.
+guiConfigWindowTitle=LanguageTool Nastavenia
+guiDemoText=Toto je uk\u00e1\u017ekov\u00fd vstup, na predvedenie funk\u010dnosti LanguageTool. Pam\u00e4tajte si si, \u017ee neobsahuje "kontrolu" preklepo.
+guiMatchCount=Potenci\u00e1lne chyby\:
+guiMenuAbout=&O programe...
+guiMenuAddRules=Na\u010d\u00edta\u0165 s\u00fabor s p&ravidlami
+guiMenuCheckClipboard=S&kontrolova\u0165 text v schr\u00e1nke (clipboard)
+guiMenuFile=&S\u00fabor
+guiMenuHelp=&Pomocn\u00edk
+guiMenuHide=Sry\u0165 do syst\u00e9movej &li\u0161ty
+guiMenuOpen=&Otvori\u0165...
+guiMenuOptions=Nastaveni&a...
+guiMenuQuit=&Koniec
+guiMenuShowMainWindow=Otvori\u0165 hlavn\u00e9 okno
+guiMotherTongue=V\u00e1\u0161 rodn\u00fd jazyk\:
+guiNoErrorsFound=Bez ch\u00fdb alebo varovan\u00ed (jazyk\: {0})
+guiNoErrorsFoundSelectedText=Neboli n\u00e1jden\u00e9 chyby alebo varovania pre zvolen\u00fd text (jazyk\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&Zmeni\u0165
+guiOOoCloseButton=Zatvori\u0165
+guiOOoIgnoreAllButton=Ignorova\u0165 v\u0161etko
+guiOOoIgnoreButton=Ignorova\u0165
+guiOOoOptionsButton=Nastavenia...
+guiProgressWindowTitle=LanguageTool\: kontrolujem text...
+guiReplaceWindowTitle=Nahradi\u0165 text
+guiReplaceWithOtherText=<in\u00fd text>
+guiRunOnPort=Spusti\u0165 ako server na po&rte
+guiSelectionCheckComplete=LanguageTool kontrola ozna\u010den\u00e9ho textu je dokon\u010den\u00e1.
+incorrect_case=T\u00e1to veta neza\u010d\u00edna s ve\u013ek\u00fdm p\u00edsmenom
+is=Islandsky
+it=Taliansky
+lt=Litovsk\u00fd
+missing_space_after_comma=Vlo\u017ete medze za \u010diarku
+ml=Malajzijsk\u00fd
+nl=Holandsky
+no_space_after=Nevlo\u017ei\u0165 medzeru pred otv\u00e1raciu z\u00e1tvorku
+no_space_before=Nevlo\u017ei\u0165 medzeru za otv\u00e1raciu z\u00e1tvorku
+no_space_before_dot=Nevlo\u017ei\u0165 medzeru pred bodku
+pl=Po\u013esky
+repetition=Mo\u017en\u00fd preklep\: zopakovali ste slovo
+result1=<br><b> {0}. Riadok {1}, st\u013apec {2}</b><br>
+resultAreaText=Tu sa zobraz\u00ed v\u00fdsledok
+resultTime=<br>\u010cas\: {0}ms (vr\u00e1tane {1}ms na pou\u017eitie pravidiel)<br>
+ru=Rusky
+sk=Slovensky
+sl=Slovinsky
+space_after_comma=Vlo\u017ei\u0165 medzeru za \u010diarku, ale nie pred \u010diarku
+startChecking=Za\u010diatok kontroly po {0}
+sv=\u0160v\u00e9dsky
+textLanguage=Jazyk textu\:
+two_commas=Dve po sebe id\u00face \u010diarky
+two_dots=Dve po sebe id\u00face bodky
+uk=Ukrajinsky
+unpaired_brackets=Nevyp\u00e1rovan\u00e9 z\u00e1tvorky alebo podobn\u00fd symbol
+whitespace_repetition=Mo\u017en\u00fd preklep\: zopakovali ste "biely znak" (whitespace)
+ro=Rumunsky
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sl.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sl.properties
new file mode 100644
index 0000000..ff90555
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sl.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=beloruski
+ca=katalonski
+category_case=Velike za\u010detnice
+category_false_friend=La\u017eni prijatelji
+category_grammar=Slovnica
+category_misc=Razno
+category_typo=Mo\u017ena tipkarska napaka
+checkDone=Preverjanje kon\u010dano, najdenih {0} potencialnih te\u017eav
+checkText=&Preveri besedilo
+correctionMessage=Popravek\:
+cs=\u010de\u0161ki
+da=danski
+de=nem\u0161ki
+desc_comma_whitespace=Uporaba presledka, tabulatorja ali preloma vrstice pred vejico in pred/po oklepaju
+desc_double_punct=Uporaba dveh zaporednih pik ali vejic
+desc_repetition=Podvojena beseda (npr. 'bo bo')
+desc_repetition_short=Podvojena beseda
+desc_unpaired_brackets=Neparni oklepaji, zaviti oklepaji, narekovaji in podobni znaki
+desc_uppercase_sentence=Preveri, da se poved za\u010dne z veliko za\u010detnico
+desc_whitespacerepetition=Ponovljen presledek (nepravilno oblikovanje)
+double_dots_short=Zaporedni piki
+double_commas_short=Zaporedni vejici
+en=angle\u0161ki
+enterText=Prosimo, vnesite ali prilepite besedilo za preverjanje v zgornje podro\u010dje
+enterText2=Prosimo, vnesite besedilo za preverjanje semkaj
+errorContext=Kontekst\:
+errorMessage=Sporo\u010dilo\:
+es=\u0161panski
+false_friend=La\u017eni prijatelj
+false_friend_desc=namig o napa\u010dnem prijatelju za\:
+false_friend_hint=Namig\: "{0}" ({1}) pomeni {2} ({3}).
+false_friend_suggestion=Ste imeli v mislih {0}?
+fr=francoski
+gl=galicijski
+guiCancelButton=Prekli\u010di
+guiCheckComplete=Preverjanje z LanguageTool je dokon\u010dano.
+guiConfigWindowTitle=Mo\u017enosti LanguageTool
+guiDemoText=To je primer vnosa za potrebe prikaza delovanja orodja LanguageTool. Upo\u0161tevajte, da ne vklju\u010duje preverjanja \u010drkovanja.
+guiMatchCount=Mo\u017ene napake\:
+guiMenuAbout=&O raz\u0161iritvi ...
+guiMenuAddRules=Nalo\u017ei &datoteko s pravili
+guiMenuCheckClipboard=&Preveri besedilo na odlo\u017ei\u0161\u010du
+guiMenuFile=&Datoteka
+guiMenuHelp=Po&mo\u010d
+guiMenuHide=&Skrij v sistemski pladenj
+guiMenuOpen=&Odpri ...
+guiMenuOptions=&Mo\u017enosti ...
+guiMenuQuit=I&zhod
+guiMenuShowMainWindow=Odpri glavno okno
+guiMotherTongue=Va\u0161 materni jezik\:
+guiNoErrorsFound=Ni najdenih napak ali opozoril (jezik\: {0})
+guiNoErrorsFoundSelectedText=V izbranem besedilu ni najdenih napak ali opozoril (jezik\: {0})
+guiOKButton=V &redu
+guiOOoChangeButton=&Spremeni
+guiOOoCloseButton=Zapri
+guiOOoIgnoreAllButton=Prezri vse
+guiOOoIgnoreButton=Prezri
+guiOOoOptionsButton=Mo\u017enosti ...
+guiProgressWindowTitle=LanguageTool\: preverjanje besedila ...
+guiReplaceWindowTitle=Zamenjaj besedilo
+guiReplaceWithOtherText=<drugo besedilo>
+guiRunOnPort=Po\u017eeni kot stre\u017enik na v&ratih
+guiSelectionCheckComplete=Preverjanje izbranega besedila z LanguageTool je dokon\u010dano.
+incorrect_case=Ta poved se ne za\u010denja z veliko za\u010detnico
+is=islandski
+it=italijanski
+lt=litovski
+missing_space_after_comma=Po vejici vstavi presledek
+ml=malajalamski
+nl=nizozemski
+no_space_after=Ne postavljaj presledka za oklepaj
+no_space_before=Ne postavljaj presledka pred zaklepaj
+no_space_before_dot=Ne postavljaj presledka po piki
+pl=poljski
+repetition=Mo\u017ena tipkarska napaka\: ponovili ste besedo
+result1=<br><b> {0}. Vrstica {1}, stolpec {2}</b><br>
+resultAreaText=Tukaj se bodo izpisali rezultati
+resultTime=<br>\u010cas\: {0}ms (vklju\u010dno z {1}ms za ujemanje pravil)<br>
+ru=ruski
+sk=slova\u0161ki
+sl=slovenski
+space_after_comma=Presledek vstavi po vejici, ne pa pred vejico
+startChecking=Za\u010detek preverjanja v {0}
+sv=\u0161vedski
+textLanguage=Jezik besedila\:
+two_commas=Dve zaporedni vejici
+two_dots=Dve zaporedni piki
+uk=ukrajinski
+unpaired_brackets=Neparni oklepaji ali podobni znaki
+whitespace_repetition=Mo\u017ena tipkarska napaka\: ponovili ste presledek
+ro=romunski
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sv.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sv.properties
new file mode 100644
index 0000000..796aabf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_sv.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+eo=Esperanto
+be=Belarusian
+ca=Catalan
+category_case=Stor eller lite bokstav
+category_false_friend=Falska v\u00e4nner
+category_grammar=Grammatik
+category_misc=\u00f6vrigt
+category_typo=Eventuellt ett stavfel
+checkDone=Kontroll utf\u00f6rd, Hittade {0} potentiella problem
+checkText=&Kontrollera Text
+correctionMessage=R\u00e4ttelse\:
+cs=Tjeckiska
+da=Danish
+de=Tyska
+desc_comma_whitespace=Anv\u00e4ndande av blanksteg f\u00f6re kommatecken eller f\u00f6re/efter parentes
+desc_double_punct=Anv\u00e4ndande av dubbla punkter eller kommatecken
+desc_repetition=Repetition av ord (exempelvis 'till till')
+desc_repetition_short=Repetition av ord
+desc_unpaired_brackets=Icke ihopparade parenteser, citattecken eller liknande symboler
+desc_uppercase_sentence=Kontrollera att meningar b\u00f6rjar med stor bokstav
+desc_whitespacerepetition=Upprepade mellanslag (d\u00e5lig formateringsvana)
+double_dots_short=Two consecutive dots
+double_commas_short=Two consecutive comma
+en=Engelska
+enterText=Skriv eller klistra in text i \u00f6vre f\u00e4ltet
+enterText2=L\u00e4gg in text f\u00f6r kontroll h\u00e4r
+errorContext=Sammanhang\:
+errorMessage=Meddelande\:
+es=Spanska
+false_friend=False friend
+false_friend_desc=Hint om falska v\u00e4nner\:
+false_friend_hint=Hint\: "{0}" ({1}) betyder {2} ({3}).
+false_friend_suggestion=Menade du {0}?
+fr=Franska
+gl=Galician
+guiCancelButton=Avbryt
+guiCheckComplete=LanguageTool kontroll \u00e4r f\u00e4rdig.
+guiConfigWindowTitle=LanguageTool Alternativ
+guiDemoText=Detta \u00e4r en en exempeltext f\u00f6r att visa hur LanguageTool fungerar.. Notera att den inte inneh\u00e5lle n\u00e5gon stavningskontrol.
+guiMatchCount=M\u00f6jliga fel\:
+guiMenuAbout=&Om...
+guiMenuAddRules=Ladda in &Regelfil
+guiMenuCheckClipboard=&Kontrollera texten i Urklipp
+guiMenuFile=&Arkiv
+guiMenuHelp=&Hj\u00e4lp
+guiMenuHide=&G\u00f6m i systemf\u00e4ltet
+guiMenuOpen=&\u00f6ppna...
+guiMenuOptions=&Alternativ...
+guiMenuQuit=&Avsluta
+guiMenuShowMainWindow=\u00f6ppna huvudf\u00f6nster
+guiMotherTongue=Ditt modersm\u00e5l\:
+guiNoErrorsFound=Hittade inga fel eller varningar (spr\u00e5k\: {0})
+guiNoErrorsFoundSelectedText=Hittade inga fel eller varningar i markerad text (spr\u00e5k\: {0})
+guiOKButton=&OK
+guiOOoChangeButton=&\u00e4ndra
+guiOOoCloseButton=St\u00e4ng
+guiOOoIgnoreAllButton=Ignorera Alla
+guiOOoIgnoreButton=Ignorera
+guiOOoOptionsButton=Alternativ...
+guiProgressWindowTitle=LanguageTool\: Kontrollerar text...
+guiReplaceWindowTitle=Ers\u00e4tt text
+guiReplaceWithOtherText=<annan text>
+guiRunOnPort=K\u00f6r server p\u00e5 po&rt
+guiSelectionCheckComplete=LanguageTool kontroll av markerad text \u00e4r utf\u00f6rd.
+incorrect_case=Denna mening b\u00f6rjar med liten bokstav
+is=Icelandic
+it=Italienska
+lt=Litauiska
+missing_space_after_comma=L\u00e4gg till ett blanksteg efter kommatecknet
+ml=Malayalam
+nl=Holl\u00e4ndska
+no_space_after=Ta bort blanksteg efter \u00f6ppnande parentesen
+no_space_before=Ta bort blanksteg f\u00f6re avslutande parentesen
+no_space_before_dot=Don't put a space before the full stop
+pl=Polska
+repetition=M\u00f6jlig felskrivning\: du repeterade ett ord
+result1=<br><b> {0}. Rad {1}, kolumn {2}</b><br>
+resultAreaText=Resultatet visas h\u00e4r
+resultTime=<br>Tid\: {0}ms (inklusive {1}ms f\u00f6r regelmatching)<br>
+ru=Ryska
+sk=Slovakiska
+sl=Slovenska
+space_after_comma=L\u00e4gg till ett mellanrum efter kommatecknet, men inte f\u00f6re
+startChecking=P\u00e5b\u00f6rjar kontroll om {0}
+sv=Svenska
+textLanguage=Textens spr\u00e5k\:
+two_commas=Dubbla kommatecken
+two_dots=Dubbla punkter
+uk=Ukrainska
+unpaired_brackets=Icke ihopparade parenteser eller liknande symboler
+whitespace_repetition=M\u00f6jlig felskrivning\: du har gjort upprepade mellanslag.
+ro=Rom\u00e2n\u0103
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_uk.properties b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_uk.properties
new file mode 100644
index 0000000..67704db
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/MessagesBundle_uk.properties
@@ -0,0 +1,93 @@
+#X-Generator: crowdin.net
+be=Belarusian
+ca=Catalan
+category_case=\u0412\u0435\u043b\u0438\u043a\u0456 \u043b\u0456\u0442\u0435\u0440\u0438
+category_false_friend=\u0424\u0430\u043b\u044c\u0448\u0438\u0432\u0456 \u0434\u0440\u0443\u0437\u0456
+category_grammar=\u0413\u0440\u0430\u043c\u0430\u0442\u0438\u043a\u0430
+category_misc=\u0406\u043d\u0448\u0435
+category_typo=\u041c\u043e\u0436\u043b\u0438\u0432\u0430 \u043c\u0435\u0445\u0430\u043d\u0456\u0447\u043d\u0430 \u043f\u043e\u043c\u0438\u043b\u043a\u0430
+checkDone=\u041f\u0435\u0440\u0435\u0432\u0456\u0440\u043a\u0443 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043e, \u0437\u043d\u0430\u0439\u0434\u0435\u043d\u043e {0} \u043f\u043e\u0442\u0435\u043d\u0446\u0456\u0430\u043b\u044c\u043d\u0438\u0445 \u043f\u043e\u043c\u0438\u043b\u043e\u043a
+checkText=\u041f\u0435\u0440\u0435\u0432\u0456\u0440\u0438\u0442\u0438 \u0442\u0435\u043a\u0441\u0442
+correctionMessage=\u0412\u0438\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u043d\u044f\:
+cs=\u0427\u0435\u0441\u044c\u043a\u0430
+da=Danish
+de=\u041D\u0456\u043C\u0435\u0446\u044C\u043A\u0430
+desc_comma_whitespace=\u0412\u0436\u0438\u0432\u0430\u043D\u043D\u044F \u043F\u0440\u043E\u0433\u0430\u043B\u0438\u043D\u0438 \u043F\u0435\u0440\u0435\u0434 \u043A\u043E\u043C\u043E\u044E \u0442\u0430 \u043F\u0435\u0440\u0435\u0434/\u043F\u0456\u0441\u043B\u044F \u0434\u0443\u0436\u043E\u043A
+desc_double_punct=\u0412\u0436\u0438\u0432\u0430\u043D\u043D\u044F \u043F\u043E\u0434\u0432\u0456\u0439\u043D\u0438\u0445 \u043A\u043E\u043C, \u043A\u0440\u0430\u043F\u043E\u043A...
+desc_repetition=\u041F\u043E\u0432\u0442\u043E\u0440\u0435\u043D\u043D\u044F \u0441\u043B\u0456\u0432 (\u043D\u0430\u043F\u0440., '\u0431\u0443\u0434\u0435 \u0431\u0443\u0434\u0435')
+desc_repetition_short=\u041F\u043E\u0432\u0442\u043E\u0440\u0435\u043D\u043D\u044F \u0441\u043B\u0456\u0432
+desc_unpaired_brackets=\u041D\u0435\u043F\u0430\u0440\u043D\u0456 \u0434\u0443\u0436\u043A\u0438, \u043B\u0430\u043F\u043A\u0438 \u0430\u0431\u043E \u0456\u043D\u0448\u0456 \u0441\u0445\u043E\u0436\u0456 \u0441\u0438\u043C\u0432\u043E\u043B\u0438
+desc_uppercase_sentence=\u041F\u0435\u0440\u0435\u0432\u0456\u0440\u044F\u0454, \u0447\u0438 \u0440\u0435\u0447\u0435\u043D\u043D\u044F \u043F\u043E\u0447\u0438\u043D\u0430\u0454\u0442\u044C\u0441\u044F \u0437 \u0432\u0435\u043B\u0438\u043A\u043E\u0457 \u043B\u0456\u0442\u0435\u0440\u0438
+desc_whitespacerepetition=\u041F\u043E\u0432\u0442\u043E\u0440 \u043F\u0440\u043E\u0431\u0456\u043B\u0443
+double_dots_short=\u041F\u043E\u0434\u0432\u0456\u0439\u043D\u0430 \u043A\u0440\u0430\u043F\u043A\u0430
+double_commas_short=\u041F\u043E\u0434\u0432\u0456\u0439\u043D\u0430 \u043A\u043E\u043C\u0430
+en=\u0410\u043D\u0433\u043B\u0456\u0439\u0441\u044C\u043A\u0430
+enterText=\u0412\u0432\u0435\u0434\u0456\u0442\u044C \u0430\u0431\u043E \u0432\u0441\u0442\u0430\u0432\u0442\u0435 \u0442\u0435\u043A\u0441\u0442 \u0434\u043B\u044F \u043F\u0435\u0440\u0435\u0432\u0456\u0440\u044F\u043D\u043D\u044F \u0432\u0433\u043E\u0440\u0456
+enterText2=\u0412\u0441\u0442\u0430\u0432\u0442\u0435 \u0442\u0435\u043A\u0441\u0442 \u0434\u043B\u044F \u043F\u0435\u0440\u0435\u0432\u0456\u0440\u044F\u043D\u043D\u044F \u0432\u0433\u043E\u0440\u0456
+eo = \u0415\u0441\u043F\u0435\u0440\u0430\u043D\u0442\u043E
+errorContext=\u041A\u043E\u043D\u0442\u0435\u043A\u0441\u0442\:
+errorMessage=\u041F\u043E\u0432\u0456\u0434\u043E\u043C\u043B\u0435\u043D\u043D\u044F\:
+es=\u0406\u0441\u043F\u0430\u043D\u0441\u044C\u043A\u0430
+false_friend=\u041E\u043C\u043E\u043D\u0456\u043C\u0438
+false_friend_desc=\u043F\u0456\u0434\u043A\u0430\u0437\u043A\u0430 \u043D\u0435\u043F\u0440\u0430\u0432\u0438\u043B\u044C\u043D\u043E\u0457 \u043F\u0430\u0440\u0438 \u0434\u043B\u044F\:
+false_friend_hint=\u041F\u0456\u0434\u043A\u0430\u0437\u043A\u0430\: "{0}" ({1}) \u043E\u0437\u043D\u0430\u0447\u0430\u0454 {2} ({3}).
+false_friend_suggestion=\u0412\u0438 \u043C\u0430\u043B\u0438 \u043D\u0430 \u0443\u0432\u0430\u0437\u0456 {0}?
+fr=\u0424\u0440\u0430\u043D\u0446\u0443\u0437\u044C\u043A\u0430
+gl=Galician
+guiCancelButton=\u0421\u043a\u0430\u0441\u0443\u0432\u0430\u0442\u0438
+guiCheckComplete=\u041f\u0435\u0440\u0435\u0432\u0456\u0440\u043a\u0443 \u0432 LanguageTool \u0437\u0430\u043a\u0456\u043d\u0447\u0435\u043d\u043e.
+guiConfigWindowTitle=\u041f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u0438 LanguageTool
+guiDemoText=\u0426\u0435 \u043f\u0440\u0438\u043a\u043b\u0430\u0434 \u0432\u0445\u0456\u0434\u043d\u043e\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0443 \u0434\u043b\u044f \u0434\u0435\u043c\u043e\u043d\u0441\u0442\u0440\u0430\u0446\u0456\u0457 \u0440\u043e\u0431\u043e\u0442\u0438 LanguageTool. \u0417\u0430\u0443\u0432\u0430\u0436\u0442\u0435, \u0449\u043e \u0446\u0435 \u043d\u0435 \u0432\u043a\u043b\u044e\u0447\u0430\u0454 \u043f\u0435\u0440\u0435\u0432\u0456\u0440\u043a\u0443 \u043e\u0440\u0444\u043e\u0433\u0440\u0430\u0444\u0456\u0457.
+guiMatchCount=\u041f\u043e\u0442\u0435\u043d\u0446\u0456\u0439\u043d\u0438\u0445 \u043f\u043e\u043c\u0438\u043b\u043e\u043a\:
+guiMenuAbout=\u041f\u0440\u043e...
+guiMenuAddRules=Load Rule File
+guiMenuCheckClipboard=\u041f\u0435\u0440\u0435\u0432\u0456\u0440\u0438\u0442\u0438 \u0442\u0435\u043a\u0441\u0442 \u0437 \u043a\u0438\u0448\u0435\u043d\u0456
+guiMenuFile=\u0424\u0430\u0439\u043b
+guiMenuHelp=\u0414\u043e\u0432\u0456\u0434\u043a\u0430
+guiMenuHide=\u0421\u0445\u043e\u0432\u0430\u0442\u0438 \u0432 \u0441\u0438\u0441\u0442\u0435\u043c\u043d\u0438\u0439 \u043b\u043e\u0442\u043e\u043a
+guiMenuOpen=\u0412\u0456\u0434\u043a\u0440\u0438\u0442\u0438...
+guiMenuOptions=\u041f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u0438...
+guiMenuQuit=\u0412\u0438\u0439\u0442\u0438
+guiMenuShowMainWindow=Open main window
+guiMotherTongue=\u0412\u0430\u0448\u0430 \u0440\u0456\u0434\u043d\u0430 \u043c\u043e\u0432\u0430\:
+guiNoErrorsFound=\u041d\u0435 \u0437\u043d\u0430\u0439\u0434\u0435\u043d\u043e \u0436\u043e\u0434\u043d\u043e\u0457 \u043f\u043e\u043c\u0438\u043b\u043a\u0438 \u0430\u0431\u043e \u043f\u043e\u043f\u0435\u0440\u0435\u0434\u0436\u0435\u043d\u043d\u044f (\u043c\u043e\u0432\u0430\: {0})
+guiNoErrorsFoundSelectedText=\u041d\u0435 \u0437\u043d\u0430\u0439\u0434\u0435\u043d\u043e \u0436\u043e\u0434\u043d\u043e\u0457 \u043f\u043e\u043c\u0438\u043b\u043a\u0438 \u0430\u0431\u043e \u043f\u043e\u043f\u0435\u0440\u0435\u0434\u0436\u0435\u043d\u043d\u044f \u0443 \u0432\u0438\u0431\u0440\u0430\u043d\u043e\u043c\u0443 \u0442\u0435\u043a\u0441\u0442\u0456 (\u043c\u043e\u0432\u0430\: {0})
+guiOKButton=\u0413\u0430\u0440\u0430\u0437\u0434
+guiOOoChangeButton=\u0417\u043c\u0456\u043d\u0438\u0442\u0438
+guiOOoCloseButton=\u0417\u0430\u043a\u0440\u0438\u0442\u0438
+guiOOoIgnoreAllButton=\u041f\u0440\u043e\u043f\u0443\u0441\u0442\u0438\u0442\u0438 \u0432\u0441\u0456
+guiOOoIgnoreButton=\u041f\u0440\u043e\u043f\u0443\u0441\u0442\u0438\u0442\u0438
+guiOOoOptionsButton=\u041f\u0430\u0440\u0430\u043c\u0435\u0442\u0440\u0438...
+guiProgressWindowTitle=LanguageTool\: \u0442\u0435\u043a\u0441\u0442 \u043f\u0435\u0440\u0435\u0432\u0456\u0440\u044f\u0454\u0442\u044c\u0441\u044f...
+guiReplaceWindowTitle=\u0417\u0430\u043c\u0456\u043d\u0438\u0442\u0438 \u0442\u0435\u043a\u0441\u0442
+guiReplaceWithOtherText=<\u0456\u043d\u0448\u0438\u0439 \u0442\u0435\u043a\u0441\u0442>
+guiRunOnPort=\u0417\u0430\u043f\u0443\u0441\u0442\u0438\u0442\u0438, \u044f\u043a \u0441\u0435\u0440\u0432\u0435\u0440 \u043d\u0430 \u043f\u043e\u0440\u0442\u0443
+guiSelectionCheckComplete=\u041f\u0435\u0440\u0435\u0432\u0456\u0440\u044f\u043d\u043d\u044f LanguageTool \u0432\u0438\u0431\u0440\u0430\u043d\u043e\u0433\u043e \u0442\u0435\u043a\u0441\u0442\u0443 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043e.
+incorrect_case=\u0426\u0435 \u0440\u0435\u0447\u0435\u043d\u043d\u044f \u043d\u0435 \u043f\u043e\u0447\u0438\u043d\u0430\u0454\u0442\u044c\u0441\u044f \u0437 \u0432\u0435\u043b\u0438\u043a\u043e\u0457 \u043b\u0456\u0442\u0435\u0440\u0438
+is=\u0406\u0441\u043b\u0430\u043d\u0434\u0441\u044c\u043a\u0438\u0439
+it=\u0406\u0442\u0430\u043b\u0456\u0439\u0441\u044c\u043a\u0430
+lt=\u041b\u0438\u0442\u043e\u0432\u0441\u044c\u043a\u0430
+missing_space_after_comma=\u0412\u0441\u0442\u0430\u0432\u0442\u0435 \u043f\u0440\u043e\u0433\u0430\u043b\u0438\u043d\u0443 \u043f\u0456\u0441\u043b\u044f \u043a\u043e\u043c\u0438
+ml=Malayalam
+nl=\u0413\u043e\u043b\u0430\u043d\u0434\u0441\u044c\u043a\u0430
+no_space_after=\u041d\u0435 \u0441\u0442\u0430\u0432\u0442\u0435 \u043f\u0440\u043e\u0433\u0430\u043b\u0438\u043d\u0443 \u043f\u0456\u0441\u043b\u044f \u043b\u0456\u0432\u043e\u0457 \u0434\u0443\u0436\u043a\u0438
+no_space_before=\u041d\u0435 \u0441\u0442\u0430\u0432\u0442\u0435 \u043f\u0440\u043e\u0433\u0430\u043b\u0438\u043d\u0443 \u043f\u0456\u0441\u043b\u044f \u043f\u0440\u0430\u0432\u043e\u0457 \u0434\u0443\u0436\u043a\u0438
+no_space_before_dot=\u041d\u0435 \u0441\u0442\u0430\u0432\u0442\u0435 \u043f\u0440\u043e\u0433\u0430\u043b\u0438\u043d\u0443 \u043f\u0435\u0440\u0435\u0434 \u043a\u0440\u0430\u043f\u043a\u043e\u044e
+pl=\u041f\u043e\u043b\u044c\u0441\u044c\u043a\u0430
+repetition=\u041c\u043e\u0436\u043b\u0438\u0432\u0430 \u043c\u0435\u0445\u0430\u043d\u0456\u0447\u043d\u0430 \u043f\u043e\u043c\u0438\u043b\u043a\u0430\: \u043f\u043e\u0432\u0442\u043e\u0440\u0435\u043d\u043d\u044f \u0441\u043b\u043e\u0432\u0430
+result1=<br><b> {0}. \u0420\u044f\u0434\u043e\u043a {1}, \u0441\u0442\u043e\u0432\u043f\u0447\u0438\u043a {2}</b><br>
+resultAreaText=\u0422\u0443\u0442 \u0437'\u044f\u0432\u043b\u044f\u0442\u044c\u0441\u044f \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0438
+resultTime=<br>\u0417\u0430\u0442\u0440\u0430\u0447\u0435\u043d\u043e\: {0}\u043c\u0441 (\u0432\u043a\u043b\u044e\u0447\u043d\u043e \u0437 {1}\u043c\u0441 \u043d\u0430 \u043f\u0435\u0440\u0435\u0432\u0456\u0440\u044f\u043d\u043d\u044f \u043f\u0440\u0430\u0432\u0438\u043b)<br>
+ru=\u0420\u043e\u0441\u0456\u0301\u0439\u0441\u044c\u043a\u0430
+sk=\u0421\u043b\u043e\u0432\u0430\u0446\u044c\u043a\u0430
+sl=\u0421\u043b\u043e\u0432\u0435\u043d\u0441\u044c\u043a\u0430
+space_after_comma=\u041f\u043e\u0441\u0442\u0430\u0432\u0442\u0435 \u043f\u0440\u043e\u0433\u0430\u043b\u0438\u043d\u0443 \u043f\u0456\u0441\u043b\u044f \u043a\u043e\u043c\u0438, \u0430 \u043d\u0435 \u043f\u0435\u0440\u0435\u0434 \u043a\u043e\u043c\u043e\u044e
+startChecking=\u041f\u043e\u0447\u0430\u0442\u043e\u043a \u043f\u0435\u0440\u0435\u0432\u0456\u0440\u044f\u043d\u043d\u044f \u0432 {0}
+sv=\u0428\u0432\u0435\u0434\u0441\u044c\u043a\u0430
+textLanguage=\u041c\u043e\u0432\u0430 \u0442\u0435\u043a\u0441\u0442\u0443\:
+two_commas=\u041f\u043e\u0434\u0432\u0456\u0439\u043d\u0430 \u043a\u043e\u043c\u0430
+two_dots=\u041f\u043e\u0434\u0432\u0456\u0439\u043d\u0430 \u043a\u0440\u0430\u043f\u043a\u0430
+uk=\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430
+unpaired_brackets=\u041d\u0435\u043f\u0430\u0440\u043d\u0456 \u0434\u0443\u0436\u043a\u0438 \u0430\u0431\u043e \u0456\u043d\u0448\u0456 \u0432\u0438\u043e\u043a\u0440\u0435\u043c\u043b\u044e\u0432\u0430\u043b\u043d\u0456 \u0441\u0438\u043c\u0432\u043e\u043b\u0438
+whitespace_repetition=\u041f\u043e\u0432\u0442\u043e\u0440 \u043f\u0440\u043e\u0431\u0456\u043b\u0443
+ro=Rom\u00e2n\u0103
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/TextFilter.java b/JLanguageTool/src/java/de/danielnaber/languagetool/TextFilter.java
new file mode 100644
index 0000000..a1eaad6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/TextFilter.java
@@ -0,0 +1,30 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+/**
+ * Filter, i.e. clean up, text before it is checked.
+ *
+ * @author Daniel Naber
+ */
+public interface TextFilter {
+
+ public String filter(String text);
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/BitextReader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/BitextReader.java
new file mode 100644
index 0000000..0770dcd
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/BitextReader.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+/**
+ * Interface for classes that implement reading from bitext files,
+ * such as translation memory files, glossary files, aligned text...
+ *
+ * @author Marcin Miłkowski
+ */
+
+public interface BitextReader extends Iterable<StringPair> {
+
+ /**
+ * Get the current line number in the file.
+ * @return The current line number.
+ */
+ public int getLineCount();
+
+ /**
+ * Get the current column number in the file.
+ * @return The current column number.
+ */
+ public int getColumnCount();
+
+ /**
+ * Get the current target column number in the file.
+ * @return The current target column number.
+ */
+ public int getTargetColumnCount();
+
+
+ /**
+ * Get the current target sentence position in the file.
+ * @return The current sentence position.
+ */
+ public int getSentencePosition();
+
+ /**
+ * Get the current line of the bitext input.
+ * @return The complete line (including source, if any).
+ */
+ public String getCurrentLine();
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/StringPair.java b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/StringPair.java
new file mode 100644
index 0000000..7677d1d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/StringPair.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+/**
+ * A convenience class to work with bitext strings.
+ * @author Marcin Miłkowski
+ *
+ */
+public class StringPair {
+
+ private final String sourceString;
+
+ private final String targetString;
+
+ public StringPair(final String source, final String target) {
+ sourceString = source;
+ targetString = target;
+ }
+
+ public String getSource() {
+ return sourceString;
+ }
+
+ public String getTarget() {
+ return targetString;
+ }
+
+ public String toString() {
+ return sourceString + " & " + targetString;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java
new file mode 100644
index 0000000..b0a4eaa
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/TabBitextReader.java
@@ -0,0 +1,129 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Iterator;
+
+/**
+ * Reader of simple tab-delimited bilingual files.
+ *
+ * @author Marcin Miłkowski
+ */
+public class TabBitextReader implements BitextReader {
+
+ protected BufferedReader in;
+ protected StringPair nextPair;
+ protected String nextLine;
+ private String prevLine;
+
+ private int lineCount = -1;
+ protected int sentencePos;
+
+ public TabBitextReader(final String filename, final String encoding) {
+ try {
+ if (encoding == null) {
+ in = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
+ } else {
+ in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
+ }
+ nextLine = in.readLine();
+ prevLine = "";
+ nextPair = tab2StringPair(nextLine);
+ } catch(IOException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ protected StringPair tab2StringPair(final String line) {
+ if (line == null) {
+ return null;
+ }
+ final String[] fields = line.split("\t");
+ return new StringPair(fields[0], fields[1]);
+ }
+
+ @Override
+ public Iterator<StringPair> iterator() {
+ return new TabReader();
+ }
+
+ class TabReader implements Iterator<StringPair> {
+
+ public boolean hasNext() {
+ return nextLine != null;
+ }
+
+ public StringPair next() {
+ try {
+ final StringPair result = nextPair;
+ sentencePos = nextPair.getSource().length() + 1;
+ if (nextLine != null) {
+ prevLine = nextLine;
+ nextLine = in.readLine();
+ nextPair = tab2StringPair(nextLine);
+ lineCount++;
+ if (nextLine == null) {
+ in.close();
+ }
+ }
+ return result;
+ } catch(IOException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ // The file is read-only.
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ @Override
+ public int getColumnCount() {
+ return sentencePos;
+ }
+
+ @Override
+ public int getTargetColumnCount() {
+ return 1;
+ }
+
+ @Override
+ public int getLineCount() {
+ return lineCount;
+ }
+
+ @Override
+ public int getSentencePosition() {
+ return sentencePos;
+ }
+
+ @Override
+ public String getCurrentLine() {
+ return prevLine;
+ }
+
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/WordFastTMReader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/WordFastTMReader.java
new file mode 100644
index 0000000..cadad69
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/bitext/WordFastTMReader.java
@@ -0,0 +1,87 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.bitext;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * Reader of WordFast Translation Memory text files.
+ * They are simple tab-delimited text files.
+ *
+ * @author Marcin Miłkowski
+ */
+public class WordFastTMReader extends TabBitextReader {
+
+ public WordFastTMReader(final String filename, final String encoding) throws IOException {
+ super(filename, encoding);
+ //skip the header (first line)
+ if (nextLine != null) {
+ nextLine = in.readLine();
+ nextPair = tab2StringPair(nextLine);
+ }
+ }
+
+ public final StringPair tab2StringPair(final String line) {
+ if (line == null) {
+ return null;
+ }
+ final String[] fields = line.split("\t");
+ sentencePos = fields[4].length() + 1;
+ return new StringPair(fields[4], fields[6]);
+ }
+
+ @Override
+ public Iterator<StringPair> iterator() {
+ return new TabReader();
+ }
+
+ class TabReader implements Iterator<StringPair> {
+
+ public boolean hasNext() {
+ return nextLine != null;
+ }
+
+ public StringPair next() {
+ try {
+ final StringPair result = nextPair;
+
+ if (nextLine != null) {
+ nextLine = in.readLine();
+ nextPair = tab2StringPair(nextLine);
+ if (nextLine == null) {
+ in.close();
+ }
+ }
+ return result;
+ } catch(IOException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ // The file is read-only.
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+}
+
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/DefaultResourceDataBroker.java b/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/DefaultResourceDataBroker.java
new file mode 100644
index 0000000..d365ea5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/DefaultResourceDataBroker.java
@@ -0,0 +1,360 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.databroker;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.security.Permission;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * Responsible for getting any items from the grammar checker's resource
+ * directories. This default data broker assumes that they are accessible
+ * directly via class-path and the directory names are like specified in:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@link ResourceDataBroker#RESOURCE_DIR}</li>
+ * <li>{@link ResourceDataBroker#RULES_DIR}</li>
+ * </ul>
+ * <p>
+ *
+ * If you'd like to determine another resource directory location this default
+ * data broker provides proper methods.
+ * Assuming your {@code /rules} and {@code /resource} directories are accessible
+ * via class-path with following path information:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@code /res/grammarchecker/rulesdirname}</li>
+ * <li>{@code /res/grammarchecker/resourcedirname}</li>
+ * </ul>
+ *
+ * In this case you have to invoke the methods
+ * {@link ResourceDataBroker#setRulesDir(String)} and
+ * {@link ResourceDataBroker#setResourceDir(String)} with following arguments:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@code /res/grammarchecker/rulesdirname}</li>
+ * <li>{@code /res/grammarchecker/resourcedirname}</li>
+ * </ul>
+ * <p>
+ *
+ * Make sure that you never obtain any grammar checker resources by calling
+ * {@code Object.class.getResource(String)} or {@code
+ * Object.class.getResourceAsStream(String)} directly. If you would like to
+ * obtain something from these directories do always use
+ * {@link JLanguageTool#getDataBroker()} which provides proper methods for
+ * reading the directories above.
+ * <p>
+ *
+ * For example, if you want to get the {@link URL} of {@code
+ * /rules/de/grammar.xml} just invoke
+ * {@link ResourceDataBroker#getFromRulesDirAsUrl(String)} and pass {@code
+ * /de/grammar.xml} as a string. Note: The {@code /rules} directory's name isn't
+ * passed, because its name might have changed. The same usage does apply for the
+ * {@code /resource} directory.
+ *
+ * @see ResourceDataBroker
+ * @author PAX
+ * @since 1.0.1
+ */
+public class DefaultResourceDataBroker implements ResourceDataBroker {
+
+ /**
+ * The directory's name of the grammar checker's resource directory. The
+ * default value equals {@link ResourceDataBroker#RESOURCE_DIR}.
+ */
+ protected String resourceDir;
+
+ /**
+ * The directory's name of the grammar checker's rules directory. The
+ * default value equals {@link ResourceDataBroker#RULES_DIR}.
+ */
+ protected String rulesDir;
+
+ /**
+ * Instantiates this data broker with the default resource directory names
+ * as specified in:
+ *
+ * <ul>
+ * <li>{@link ResourceDataBroker#RESOURCE_DIR}</li>
+ * <li>{@link ResourceDataBroker#RULES_DIR}</li>
+ * </ul>
+ */
+ public DefaultResourceDataBroker() {
+ this(ResourceDataBroker.RESOURCE_DIR, ResourceDataBroker.RULES_DIR);
+ }
+
+ /**
+ * Instantiates this data broker with the passed resource directory names.
+ *
+ * @param resourceDir
+ * The directory's name of the grammar checker's resource
+ * directory. The default value equals
+ * {@link ResourceDataBroker#RESOURCE_DIR}.
+ * @param rulesDir
+ * The directory's name of the grammar checker's rules directory.
+ * The default value equals
+ * {@link ResourceDataBroker#RULES_DIR}.
+ */
+ public DefaultResourceDataBroker(final String resourceDir, final String rulesDir) {
+ this.setResourceDir(resourceDir);
+ this.setRulesDir(rulesDir);
+ }
+
+ /**
+ * See:
+ * {@link ResourceDataBroker#getFromResourceDirAsStream(java.lang.String)}
+ *
+ * @param path
+ * The relative path to the item inside of the {@code /resource}
+ * directory. Please start your path information with {@code /}
+ * because it will be concatenated with the directory's name:
+ * /resource<b>/yourpath</b>.
+ * @return An {@link InputStream} object to the requested item or {@code
+ * null} if it wasn't found.
+ */
+ @Override
+ public InputStream getFromResourceDirAsStream(final String path) {
+ final String completePath = this.getCompleteResourceUrl(path);
+ return ResourceDataBroker.class.getResourceAsStream(completePath);
+ }
+
+ /**
+ * See:
+ * {@link ResourceDataBroker#getFromResourceDirAsUrl(java.lang.String)}
+ *
+ * @param path
+ * The relative path to the item inside of the {@code /resource}
+ * directory. Please start your path information with {@code /}
+ * because it will be concatenated with the directory's name:
+ * /resource<b>/yourpath</b>.
+ * @return An {@link URL} object to the requested item or {@code null} if it
+ * wasn't found.
+ */
+ @Override
+ public URL getFromResourceDirAsUrl(final String path) {
+ final String completePath = this.getCompleteResourceUrl(path);
+ return getFixedJarURL(ResourceDataBroker.class.getResource(completePath));
+ }
+
+ /**
+ * Concatenates the passed resource path with the currently set {@code
+ * resource} directory path.
+ *
+ * @param path
+ * The relative path to a resource item inside of the {@code
+ * resource} directory.
+ * @return The full relative path to the resource including the path to the
+ * {@code resource} directory.
+ */
+ private String getCompleteResourceUrl(final String path) {
+ final StringBuffer completePath = new StringBuffer(this.getResourceDir());
+
+ if (!this.getResourceDir().endsWith("/") && !(path.charAt(0)=='/')) {
+ completePath.append('/');
+ }
+
+ if (this.getResourceDir().endsWith("/") && (path.charAt(0)=='/')
+ && path.length() > 1) {
+ completePath.append(path.substring(1));
+ } else {
+ completePath.append(path);
+ }
+
+ return completePath.toString();
+ }
+
+ /**
+ * See:
+ * {@link ResourceDataBroker#getFromRulesDirAsStream(java.lang.String)}
+ *
+ * @param path
+ * The relative path to the item inside of the {@code /rules}
+ * directory. Please start your path information with {@code /}
+ * because it will be concatenated with the directory's name:
+ * /rules<b>/yourpath</b>.
+ * @return An {@link InputStream} object to the requested item or {@code
+ * null} if it wasn't found.
+ */
+ @Override
+ public InputStream getFromRulesDirAsStream(final String path) {
+ final StringBuffer completePath = this.getCompleteRulesUrl(path);
+ return ResourceDataBroker.class.getResourceAsStream(completePath.toString());
+ }
+
+ /**
+ * See: {@link ResourceDataBroker#getFromRulesDirAsUrl(java.lang.String)}
+ *
+ * @param path
+ * The relative path to the item inside of the {@code /rules}
+ * directory. Please start your path information with {@code /}
+ * because it will be concatenated with the directory's name:
+ * /rules<b>/yourpath</b>.
+ * @return An {@link URL} object to the requested item or {@code null} if it
+ * wasn't found.
+ */
+ @Override
+ public URL getFromRulesDirAsUrl(final String path) {
+ final StringBuffer completePath = this.getCompleteRulesUrl(path);
+ return getFixedJarURL(ResourceDataBroker.class.getResource(completePath.toString()));
+ }
+
+ /**
+ * Concatenates the passed resource path with the currently set {@code
+ * rules} directory path.
+ *
+ * @param path
+ * The relative path to a resource item inside of the {@code
+ * rules} directory.
+ * @return The full relative path to the resource including the path to the
+ * {@code rules} directory.
+ */
+ private StringBuffer getCompleteRulesUrl(final String path) {
+ final StringBuffer completePath = new StringBuffer(this.getRulesDir());
+
+ if (!this.getRulesDir().endsWith("/") && !(path.charAt(0)=='/')) {
+ completePath.append('/');
+ }
+
+ if (this.getRulesDir().endsWith("/") && (path.charAt(0)=='/') && path.length() > 1) {
+ completePath.append(path.substring(1));
+ } else {
+ completePath.append(path);
+ }
+
+ return completePath;
+ }
+
+ /**
+ * @return The directory's name of the grammar checker's resource directory.
+ * The default value equals
+ * {@link ResourceDataBroker#RESOURCE_DIR}.
+ */
+ @Override
+ public String getResourceDir() {
+ return this.resourceDir;
+ }
+
+ /**
+ * @param resourceDir
+ * The directory's name of the grammar checker's resource
+ * directory. The default value was
+ * {@link ResourceDataBroker#RESOURCE_DIR}. Please let this
+ * string start with {@code '/'} and use this character as path
+ * separator. Don't set this character to the string's end. Valid
+ * example value: {@code /subdir/furtherdir/resourcedir}.
+ */
+ @Override
+ public void setResourceDir(final String resourceDir) {
+ this.resourceDir = (resourceDir == null) ? "" : resourceDir;
+ }
+
+ /**
+ * @return The directory's name of the grammar checker's rules directory.
+ * The default value equals {@link ResourceDataBroker#RULES_DIR}.
+ */
+ @Override
+ public String getRulesDir() {
+ return this.rulesDir;
+ }
+
+ /**
+ * @param rulesDir
+ * The directory's name of the grammar checker's rules directory.
+ * The default value was {@link ResourceDataBroker#RULES_DIR}.
+ * Please let this string start with {@code '/'} and use this
+ * character as path separator. Don't set this character to the
+ * string's end. Valid example value: {@code
+ * /subdir/furtherdir/rulesdir}.
+ */
+ @Override
+ public void setRulesDir(final String rulesDir) {
+ this.rulesDir = (rulesDir == null) ? "" : rulesDir;
+ }
+
+ /**
+ * Fixes the getResource bug if you want to obtain any resource from a JAR file under Java
+ * 1.5.0_16 Webstart. (Workaround by {@code mevanclark} from http://forums.sun.com)
+ *
+ * @param url The {@link URL} to be fixed.
+ * @return The fixed version if necessary.
+ */
+ private static URL getFixedJarURL(URL url) {
+ if (url == null) {
+ return url;
+ }
+
+ final String originalURLProtocol = url.getProtocol();
+ if (!"jar".equalsIgnoreCase(originalURLProtocol)) {
+ return url;
+ }
+
+ final String originalURLString = url.toString();
+ final int bangSlashIndex = originalURLString.indexOf("!/");
+ if (bangSlashIndex > -1) {
+ return url;
+ }
+
+ final String originalURLPath = url.getPath();
+ final URLConnection urlConnection;
+ try {
+ urlConnection = url.openConnection();
+ if (urlConnection == null) {
+ throw new IOException("urlConnection is null");
+ }
+ } catch (IOException e) {
+ return url;
+ }
+
+ final Permission urlConnectionPermission;
+ try {
+ urlConnectionPermission = urlConnection.getPermission();
+ if (urlConnectionPermission == null) {
+ throw new IOException("urlConnectionPermission is null");
+ }
+ } catch (IOException e) {
+ return url;
+ }
+
+ final String urlConnectionPermissionName = urlConnectionPermission.getName();
+ if (urlConnectionPermissionName == null) {
+ return url;
+ }
+
+ final File file = new File(urlConnectionPermissionName);
+ if (!file.exists()) {
+ return url;
+ }
+
+ try {
+ final String newURLStr = "jar:" + file.toURI().toURL().toExternalForm() + "!/" + originalURLPath;
+ url = new URL(newURLStr);
+ } catch (MalformedURLException e) {
+ return url;
+ }
+
+ return url;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/ResourceDataBroker.java b/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/ResourceDataBroker.java
new file mode 100644
index 0000000..eac263b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/databroker/ResourceDataBroker.java
@@ -0,0 +1,139 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.databroker;
+
+import java.io.InputStream;
+import java.net.URL;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * Is responsible for getting the necessary resources for the grammar checker
+ * library. Following directories are currently needed by a couple of classes:
+ *
+ * <ul style="list-type: circle">
+ * <li>{@code /resource}</li>
+ * <li>{@code /rules}</li>
+ * </ul>
+ *
+ * This interface determines methods to obtain any contents from these
+ * directories.
+ * <p>
+ *
+ * Make sure that you never obtain any grammar checker resources by calling
+ * {@code Object.class.getResource(String)} or {@code
+ * Object.class.getResourceAsStream(String)} directly. If you would like to
+ * obtain something from these directories do always use
+ * {@link JLanguageTool#getDataBroker()} which provides proper methods for
+ * reading the directories above.
+ * <p>
+ *
+ * For example, if you want to get the {@link URL} of {@code
+ * /rules/de/grammar.xml} just invoke
+ * {@link ResourceDataBroker#getFromRulesDirAsUrl(String)} and pass {@code
+ * /de/grammar.xml} as a string. Note: The {@code /rules} directory's name isn't
+ * passed, because its name might have changed. The same usage does apply for the
+ * {@code /resource} directory.
+ *
+ * @author PAX
+ * @since 1.0.1
+ */
+public interface ResourceDataBroker {
+
+ /**
+ * The directory name of the {@code /resource} directory.
+ */
+ public static final String RESOURCE_DIR = "/resource";
+
+ /**
+ * The directory name of the {@code /rules} directory.
+ */
+ public static final String RULES_DIR = "/rules";
+
+ /**
+ * Gets any resource from the grammar checker's {@code /resource} directory.
+ *
+ * @param path Path to an item from the {@code /resource} directory.
+ * @return An {@link URL} object to the requested item or {@code null} if it
+ * wasn't found.
+ */
+ public URL getFromResourceDirAsUrl(String path);
+
+ /**
+ * Gets any resource from the grammar checker's {@code /resource} directory.
+ *
+ * @param path Path to an item from the {@code /resource} directory.
+ * @return An {@link InputStream} object to the requested item or {@code null}
+ * if it wasn't found.
+ */
+ public InputStream getFromResourceDirAsStream(String path);
+
+ /**
+ * Gets any resource from the grammar checker's {@code /rules} directory.
+ *
+ * @param path
+ * Path to an item from the {@code /rules} directory.
+ * @return An {@link URL} object to the requested item or {@code null} if it
+ * wasn't found.
+ */
+ public URL getFromRulesDirAsUrl(String path);
+
+ /**
+ * Gets any resource from the grammar checker's {@code /rules} directory.
+ *
+ * @param path Path to an item from the {@code /rules} directory.
+ * @return An {@link InputStream} object to the requested item or {@code
+ * null} if it wasn't found.
+ */
+ public InputStream getFromRulesDirAsStream(String path);
+
+ /**
+ * @return The currently set resource directory path as a string. Make sure
+ * that you comply with the following format when setting this value:
+ * <p>
+ * {@code /subdir/furtherdir/resourcedir}
+ */
+ public String getResourceDir();
+
+ /**
+ * @param resourceDir The used directory path to the {@code /resource} directory.
+ * Make sure that you comply with the following format when setting
+ * this value:
+ * <p>
+ * {@code /subdir/furtherdir/resourcedir}
+ */
+ public void setResourceDir(String resourceDir);
+
+ /**
+ * @return The currently set rules directory path as a string. Make sure
+ * that you comply with the following format when setting this value:
+ * <p>
+ * {@code /subdir/furtherdir/rulesdir}
+ */
+ public String getRulesDir();
+
+ /**
+ * @param rulesDir The used directory path to the {@code /rules} directory. Make
+ * sure that you comply with the following format when setting this
+ * value:
+ * <p>
+ * {@code /subdir/furtherdir/rulesdir}
+ */
+ public void setRulesDir(String rulesDir);
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/AboutDialog.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/AboutDialog.java
new file mode 100644
index 0000000..26df4a1
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/AboutDialog.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.util.ResourceBundle;
+
+import javax.swing.JOptionPane;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A dialog with version and copyright information.
+ *
+ * @author Daniel Naber
+ */
+public class AboutDialog {
+
+ protected final ResourceBundle messages;
+
+ public AboutDialog(final ResourceBundle messages) {
+ this.messages = messages;
+ }
+
+ public void show() {
+ final String aboutText =
+ StringTools.getLabel(messages.getString("guiMenuAbout"));
+ JOptionPane.showMessageDialog(null, getAboutText(),
+ aboutText, JOptionPane.INFORMATION_MESSAGE);
+ }
+
+ protected String getAboutText() {
+ return "LanguageTool " + JLanguageTool.VERSION + "\n"
+ + "Copyright (C) 2005-2010 Daniel Naber\n"
+ + "This software is licensed under the GNU Lesser General Public License.\n"
+ + "LanguageTool Homepage: http://www.languagetool.org\n\n"
+ + "Maintainers of the language modules:\n\n"
+ + Language.getAllMaintainers(messages);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Configuration.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Configuration.java
new file mode 100644
index 0000000..932e1fe
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Configuration.java
@@ -0,0 +1,233 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.*;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.server.HTTPServer;
+
+/**
+ * Configuration -- currently this is just a list of disabled rule IDs.
+ * Configuration is loaded from and stored to a properties file.
+ *
+ * @author Daniel Naber
+ */
+public class Configuration {
+
+ private static final String CONFIG_FILE = "languagetool.properties";
+ private static final String DISABLED_RULES_CONFIG_KEY = "disabledRules";
+ private static final String ENABLED_RULES_CONFIG_KEY = "enabledRules";
+ private static final String DISABLED_CATEGORIES_CONFIG_KEY = "disabledCategories";
+ private static final String MOTHER_TONGUE_CONFIG_KEY = "motherTongue";
+ private static final String SERVER_RUN_CONFIG_KEY = "serverMode";
+ private static final String SERVER_PORT_CONFIG_KEY = "serverPort";
+
+ private File configFile;
+
+ private Set<String> disabledRuleIds = new HashSet<String>();
+ private Set<String> enabledRuleIds = new HashSet<String>();
+ private Set<String> disabledCategoryNames = new HashSet<String>();
+ private Language motherTongue;
+ private boolean runServer;
+ private int serverPort = HTTPServer.DEFAULT_PORT;
+
+ public Configuration(final File baseDir, final String filename)
+ throws IOException {
+ if (!baseDir.isDirectory()) {
+ throw new IllegalArgumentException("Not a directory: " + baseDir);
+ }
+ configFile = new File(baseDir, filename);
+ loadConfiguration();
+ }
+
+ public Configuration(final File baseDir) throws IOException {
+ this(baseDir, CONFIG_FILE);
+ }
+
+ public Set<String> getDisabledRuleIds() {
+ return disabledRuleIds;
+ }
+
+ public Set<String> getEnabledRuleIds() {
+ return enabledRuleIds;
+ }
+
+ public Set<String> getDisabledCategoryNames() {
+ return disabledCategoryNames;
+ }
+
+ public void setDisabledRuleIds(final Set<String> ruleIDs) {
+ disabledRuleIds = ruleIDs;
+ }
+
+ public void setEnabledRuleIds(final Set<String> ruleIDs) {
+ enabledRuleIds = ruleIDs;
+ }
+
+ public void setDisabledCategoryNames(final Set<String> categoryNames) {
+ disabledCategoryNames = categoryNames;
+ }
+
+ public Language getMotherTongue() {
+ return motherTongue;
+ }
+
+ public void setMotherTongue(final Language motherTongue) {
+ this.motherTongue = motherTongue;
+ }
+
+ public boolean getRunServer() {
+ return runServer;
+ }
+
+ public void setRunServer(final boolean runServer) {
+ this.runServer = runServer;
+ }
+
+ public int getServerPort() {
+ return serverPort;
+ }
+
+ public void setServerPort(final int serverPort) {
+ this.serverPort = serverPort;
+ }
+
+ private void loadConfiguration() throws IOException {
+
+ // FIXME: disabling a rule X in language Y should not disable it in all
+ // languages - need to add a language parameter
+
+ FileInputStream fis = null;
+ try {
+ fis = new FileInputStream(configFile);
+ final Properties props = new Properties();
+ props.load(fis);
+ final String val = (String) props.get(DISABLED_RULES_CONFIG_KEY);
+ if (val != null) {
+ final String[] ids = val.split(",");
+ disabledRuleIds.addAll(Arrays.asList(ids));
+ }
+
+ final String enRul = (String) props.get(ENABLED_RULES_CONFIG_KEY);
+ if (enRul != null) {
+ final String[] ids = enRul.split(",");
+ enabledRuleIds.addAll(Arrays.asList(ids));
+ }
+
+ final String cat = (String) props.get(DISABLED_CATEGORIES_CONFIG_KEY);
+ if (cat != null) {
+ final String[] names = cat.split(",");
+ disabledCategoryNames.addAll(Arrays.asList(names));
+ }
+
+ final String motherTongueStr = (String) props
+ .get(MOTHER_TONGUE_CONFIG_KEY);
+ if (motherTongueStr != null) {
+ motherTongue = Language.getLanguageForShortName(motherTongueStr);
+ }
+ final String runServerString = (String) props.get(SERVER_RUN_CONFIG_KEY);
+ if (runServerString != null) {
+ runServer = runServerString.equals("true");
+ }
+ final String serverPortString = (String) props
+ .get(SERVER_PORT_CONFIG_KEY);
+ if (serverPortString != null) {
+ serverPort = Integer.parseInt(serverPortString);
+ }
+ } catch (final FileNotFoundException e) {
+ // file not found: okay, leave disabledRuleIds empty
+ } finally {
+ if (fis != null) {
+ fis.close();
+ }
+ }
+ }
+
+ public void saveConfiguration() throws IOException {
+ final Properties props = new Properties();
+
+ if (disabledRuleIds == null) {
+ props.setProperty(DISABLED_RULES_CONFIG_KEY, "");
+ } else {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = disabledRuleIds.iterator(); iter
+ .hasNext();) {
+ final String id = iter.next();
+ sb.append(id);
+ if (iter.hasNext()) {
+ sb.append(',');
+ }
+ }
+ props.setProperty(DISABLED_RULES_CONFIG_KEY, sb.toString());
+ }
+
+ if (enabledRuleIds == null) {
+ props.setProperty(ENABLED_RULES_CONFIG_KEY, "");
+ } else {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = enabledRuleIds.iterator(); iter.hasNext();) {
+ final String id = iter.next();
+ sb.append(id);
+ if (iter.hasNext()) {
+ sb.append(',');
+ }
+ }
+ props.setProperty(ENABLED_RULES_CONFIG_KEY, sb.toString());
+ }
+
+ if (disabledCategoryNames == null) {
+ props.setProperty(DISABLED_CATEGORIES_CONFIG_KEY, "");
+ } else {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = disabledCategoryNames.iterator(); iter
+ .hasNext();) {
+ final String name = iter.next();
+ sb.append(name);
+ if (iter.hasNext()) {
+ sb.append(',');
+ }
+ }
+ props.setProperty(DISABLED_CATEGORIES_CONFIG_KEY, sb.toString());
+ }
+
+ if (motherTongue != null) {
+ props.setProperty(MOTHER_TONGUE_CONFIG_KEY, motherTongue.getShortName());
+ }
+ props.setProperty(SERVER_RUN_CONFIG_KEY, Boolean.valueOf(runServer)
+ .toString());
+ props.setProperty(SERVER_PORT_CONFIG_KEY, Integer.valueOf(serverPort)
+ .toString());
+ FileOutputStream fos = null;
+ try {
+ fos = new FileOutputStream(configFile);
+ props.store(fos, "LanguageTool configuration");
+ } finally {
+ if (fos != null) {
+ fos.close();
+ }
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/ConfigurationDialog.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/ConfigurationDialog.java
new file mode 100644
index 0000000..d78ea08
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/ConfigurationDialog.java
@@ -0,0 +1,497 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.awt.Container;
+import java.awt.Dimension;
+import java.awt.Frame;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Insets;
+import java.awt.Toolkit;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.KeyEvent;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import javax.swing.JButton;
+import javax.swing.JCheckBox;
+import javax.swing.JComboBox;
+import javax.swing.JComponent;
+import javax.swing.JDialog;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JRootPane;
+import javax.swing.JScrollPane;
+import javax.swing.JTextField;
+import javax.swing.KeyStroke;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.server.HTTPServer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Dialog that offers the available rules so they can be turned on/off
+ * individually.
+ *
+ * @author Daniel Naber
+ */
+public class ConfigurationDialog implements ActionListener {
+
+ private static final String NO_MOTHER_TONGUE = "---";
+
+ private JButton okButton;
+ private JButton cancelButton;
+
+ private final ResourceBundle messages;
+ private JDialog dialog;
+
+ private JComboBox motherTongueBox;
+
+ private JCheckBox serverCheckbox;
+ private JTextField serverPortField;
+
+ private final List<JCheckBox> checkBoxes = new ArrayList<JCheckBox>();
+ private final List<String> checkBoxesRuleIds = new ArrayList<String>();
+ private final List<String> checkBoxesCategories = new ArrayList<String>();
+
+ private final List<String> defaultOffRules = new ArrayList<String>();
+
+ private Set<String> inactiveRuleIds = new HashSet<String>();
+ private Set<String> enabledRuleIds = new HashSet<String>();
+ private Set<String> inactiveCategoryNames = new HashSet<String>();
+ private final List<JCheckBox> categoryCheckBoxes = new ArrayList<JCheckBox>();
+ private final List<String> checkBoxesCategoryNames = new ArrayList<String>();
+ private Language motherTongue;
+ private boolean serverMode;
+ private int serverPort;
+
+ private final Frame owner;
+ private final boolean insideOOo;
+
+ public ConfigurationDialog(Frame owner, boolean insideOOo) {
+ this.owner = owner;
+ this.insideOOo = insideOOo;
+ messages = JLanguageTool.getMessageBundle();
+ }
+
+ public void show(List<Rule> rules) {
+ dialog = new JDialog(owner, true);
+ dialog.setTitle(messages.getString("guiConfigWindowTitle"));
+ checkBoxes.clear();
+ checkBoxesRuleIds.clear();
+ categoryCheckBoxes.clear();
+ checkBoxesCategoryNames.clear();
+
+ Collections.sort(rules, new CategoryComparator());
+
+ // close dialog when user presses Escape key:
+ final KeyStroke stroke = KeyStroke.getKeyStroke(KeyEvent.VK_ESCAPE, 0);
+ final ActionListener actionListener = new ActionListener() {
+ public void actionPerformed(@SuppressWarnings("unused") ActionEvent actionEvent) {
+ dialog.setVisible(false);
+ }
+ };
+ final JRootPane rootPane = dialog.getRootPane();
+ rootPane.registerKeyboardAction(actionListener, stroke,
+ JComponent.WHEN_IN_FOCUSED_WINDOW);
+
+ // JPanel
+ final JPanel checkBoxPanel = new JPanel();
+ checkBoxPanel.setLayout(new GridBagLayout());
+ GridBagConstraints cons = new GridBagConstraints();
+ cons.anchor = GridBagConstraints.NORTHWEST;
+ cons.gridx = 0;
+ int row = 0;
+ String prevID = null;
+ String prevCategory = null;
+ for (final Rule rule : rules) {
+ // avoid displaying rules from rule groups more than once:
+ if (prevID == null || !rule.getId().equals(prevID)) {
+ cons.gridy = row;
+ final JCheckBox checkBox = new JCheckBox(rule.getDescription());
+ if (inactiveRuleIds != null
+ && (inactiveRuleIds.contains(rule.getId()) || inactiveCategoryNames
+ .contains(rule.getCategory().getName()))) {
+ checkBox.setSelected(false);
+ } else {
+ checkBox.setSelected(true);
+ }
+
+ if (rule.isDefaultOff() && !enabledRuleIds.contains(rule.getId())) {
+ checkBox.setSelected(false);
+ }
+
+ if (rule.isDefaultOff()) {
+ defaultOffRules.add(rule.getId());
+ if (rule.getCategory().isDefaultOff()) {
+ inactiveCategoryNames.add(rule.getCategory().getName());
+ }
+ } else {
+ if (rule.getCategory().isDefaultOff()) {
+ inactiveCategoryNames.remove(rule.getCategory().getName());
+ }
+ }
+
+ final ActionListener ruleCheckBoxListener = new ActionListener() {
+ public void actionPerformed(final ActionEvent actionEvent) {
+ final JCheckBox cBox = (JCheckBox) actionEvent.getSource();
+ final boolean selected = cBox.getModel().isSelected();
+ int i = 0;
+ for (final JCheckBox chBox : checkBoxes) {
+ if (chBox.equals(cBox)) {
+ final int catNo = checkBoxesCategoryNames
+ .indexOf(checkBoxesCategories.get(i));
+ if (selected && !categoryCheckBoxes.get(catNo).isSelected()) {
+ categoryCheckBoxes.get(catNo).setSelected(true);
+ }
+ }
+ i++;
+ }
+ }
+ };
+ checkBox.addActionListener(ruleCheckBoxListener);
+ checkBoxes.add(checkBox);
+ checkBoxesRuleIds.add(rule.getId());
+ checkBoxesCategories.add(rule.getCategory().getName());
+ final boolean showHeadline = rule.getCategory() != null
+ && !rule.getCategory().getName().equals(prevCategory);
+ if ((showHeadline || prevCategory == null)
+ && rule.getCategory() != null) {
+
+ // TODO: maybe use a Tree of Checkboxes here, like in:
+ // http://www.javaworld.com/javaworld/jw-09-2007/jw-09-checkboxtree.html
+ final JCheckBox categoryCheckBox = new JCheckBox(rule.getCategory()
+ .getName());
+ if (inactiveCategoryNames != null
+ && inactiveCategoryNames.contains(rule.getCategory().getName())) {
+ categoryCheckBox.setSelected(false);
+ } else {
+ categoryCheckBox.setSelected(true);
+ }
+
+ final ActionListener categoryCheckBoxListener = new ActionListener() {
+ public void actionPerformed(final ActionEvent actionEvent) {
+ final JCheckBox cBox = (JCheckBox) actionEvent.getSource();
+ final boolean selected = cBox.getModel().isSelected();
+ int i = 0;
+ for (final JCheckBox ruleBox : checkBoxes) {
+ if (ruleBox.isSelected() != selected) {
+ if (checkBoxesCategories.get(i).equals(cBox.getText())) {
+ ruleBox.setSelected(selected);
+ }
+ }
+ i++;
+ }
+ }
+ };
+
+ categoryCheckBox.addActionListener(categoryCheckBoxListener);
+ categoryCheckBoxes.add(categoryCheckBox);
+ checkBoxesCategoryNames.add(rule.getCategory().getName());
+ checkBoxPanel.add(categoryCheckBox, cons);
+ prevCategory = rule.getCategory().getName();
+ cons.gridy++;
+ row++;
+ }
+ checkBox.setMargin(new Insets(0, 20, 0, 0)); // indent
+ checkBoxPanel.add(checkBox, cons);
+ row++;
+ }
+ prevID = rule.getId();
+ }
+
+ final JPanel motherTonguePanel = new JPanel();
+ motherTonguePanel.add(new JLabel(messages.getString("guiMotherTongue")),
+ cons);
+ motherTongueBox = new JComboBox(getPossibleMotherTongues());
+ if (motherTongue != null) {
+ if (motherTongue == Language.DEMO) {
+ motherTongueBox.setSelectedItem(NO_MOTHER_TONGUE);
+ } else {
+ motherTongueBox.setSelectedItem(messages.getString(motherTongue
+ .getShortName()));
+ }
+ }
+ motherTonguePanel.add(motherTongueBox, cons);
+
+ final JPanel portPanel = new JPanel();
+ portPanel.setLayout(new GridBagLayout());
+ // TODO: why is this now left-aligned?!?!
+ cons = new GridBagConstraints();
+ cons.insets = new Insets(0, 4, 0, 0);
+ cons.gridx = 0;
+ cons.gridy = 0;
+ cons.anchor = GridBagConstraints.WEST;
+ cons.fill = GridBagConstraints.NONE;
+ cons.weightx = 0.0f;
+ if (!insideOOo) {
+ serverCheckbox = new JCheckBox(StringTools.getLabel(messages
+ .getString("guiRunOnPort")));
+ serverCheckbox.setMnemonic(StringTools.getMnemonic(messages
+ .getString("guiRunOnPort")));
+ serverCheckbox.setSelected(serverMode);
+ portPanel.add(serverCheckbox, cons);
+ serverPortField = new JTextField(Integer.toString(serverPort));
+ serverPortField.setEnabled(serverCheckbox.isSelected());
+ // TODO: without this the box is just a few pixels small, but why??:
+ serverPortField.setMinimumSize(new Dimension(100, 25));
+ cons.gridx = 1;
+ serverCheckbox.addActionListener(new ActionListener() {
+ public void actionPerformed(@SuppressWarnings("unused") ActionEvent e) {
+ serverPortField.setEnabled(serverCheckbox.isSelected());
+ }
+ });
+ portPanel.add(serverPortField, cons);
+ }
+
+ final JPanel buttonPanel = new JPanel();
+ buttonPanel.setLayout(new GridBagLayout());
+ okButton = new JButton(StringTools.getLabel(messages
+ .getString("guiOKButton")));
+ okButton.setMnemonic(StringTools.getMnemonic(messages
+ .getString("guiOKButton")));
+ okButton.addActionListener(this);
+ cancelButton = new JButton(StringTools.getLabel(messages
+ .getString("guiCancelButton")));
+ cancelButton.setMnemonic(StringTools.getMnemonic(messages
+ .getString("guiCancelButton")));
+ cancelButton.addActionListener(this);
+ cons = new GridBagConstraints();
+ cons.insets = new Insets(0, 4, 0, 0);
+ buttonPanel.add(okButton, cons);
+ buttonPanel.add(cancelButton, cons);
+
+ final Container contentPane = dialog.getContentPane();
+ contentPane.setLayout(new GridBagLayout());
+ cons = new GridBagConstraints();
+ cons.insets = new Insets(4, 4, 4, 4);
+ cons.gridx = 0;
+ cons.gridy = 0;
+ cons.weightx = 10.0f;
+ cons.weighty = 10.0f;
+ cons.fill = GridBagConstraints.BOTH;
+ contentPane.add(new JScrollPane(checkBoxPanel), cons);
+
+ cons.gridx = 0;
+ cons.gridy = 1;
+ cons.weightx = 0.0f;
+ cons.weighty = 0.0f;
+ cons.fill = GridBagConstraints.NONE;
+ cons.anchor = GridBagConstraints.WEST;
+ contentPane.add(motherTonguePanel, cons);
+
+ cons.gridx = 0;
+ cons.gridy = 2;
+ cons.weightx = 0.0f;
+ cons.weighty = 0.0f;
+ cons.fill = GridBagConstraints.NONE;
+ cons.anchor = GridBagConstraints.WEST;
+ contentPane.add(portPanel, cons);
+
+ cons.gridx = 0;
+ cons.gridy = 3;
+ cons.weightx = 0.0f;
+ cons.weighty = 0.0f;
+ cons.fill = GridBagConstraints.NONE;
+ cons.anchor = GridBagConstraints.EAST;
+ contentPane.add(buttonPanel, cons);
+
+ dialog.pack();
+ dialog.setSize(500, 500);
+ // center on screen:
+ final Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
+ final Dimension frameSize = dialog.getSize();
+ dialog.setLocation(screenSize.width / 2 - frameSize.width / 2,
+ screenSize.height / 2 - frameSize.height / 2);
+ dialog.setVisible(true);
+ }
+
+ private Object[] getPossibleMotherTongues() {
+ final List<Object> motherTongues = new ArrayList<Object>();
+ motherTongues.add(NO_MOTHER_TONGUE);
+ for (final Language lang : Language.LANGUAGES) {
+ if (lang != Language.DEMO) {
+ motherTongues.add(messages.getString(lang.getShortName()));
+ }
+ }
+ return motherTongues.toArray();
+ }
+
+ public void actionPerformed(ActionEvent e) {
+ if (e.getSource() == okButton) {
+ int i = 0;
+ inactiveCategoryNames.clear();
+ for (final JCheckBox checkBox : categoryCheckBoxes) {
+ if (!checkBox.isSelected()) {
+ final String categoryName = checkBoxesCategoryNames.get(i);
+ inactiveCategoryNames.add(categoryName);
+ }
+ i++;
+ }
+ i = 0;
+ inactiveRuleIds.clear();
+ enabledRuleIds.clear();
+ for (final JCheckBox checkBox : checkBoxes) {
+ if (!checkBox.isSelected()) {
+ final String ruleId = checkBoxesRuleIds.get(i);
+ if (!defaultOffRules.contains(ruleId)) {
+ inactiveRuleIds.add(ruleId);
+ }
+ }
+
+ if (checkBox.isSelected()) {
+ final String ruleId = checkBoxesRuleIds.get(i);
+ if (defaultOffRules.contains(ruleId)) {
+ enabledRuleIds.add(ruleId);
+ }
+ }
+
+ i++;
+ }
+
+ if (motherTongueBox.getSelectedItem() instanceof String) {
+ motherTongue = getLanguageForLocalizedName(motherTongueBox
+ .getSelectedItem().toString());
+ } else {
+ motherTongue = (Language) motherTongueBox.getSelectedItem();
+ }
+ if (serverCheckbox != null) {
+ serverMode = serverCheckbox.isSelected();
+ serverPort = Integer.parseInt(serverPortField.getText());
+ }
+ dialog.setVisible(false);
+ } else if (e.getSource() == cancelButton) {
+ dialog.setVisible(false);
+ }
+ }
+
+ public void setDisabledRules(Set<String> ruleIDs) {
+ inactiveRuleIds = ruleIDs;
+ }
+
+ public Set<String> getDisabledRuleIds() {
+ return inactiveRuleIds;
+ }
+
+ public void setEnabledRules(Set<String> ruleIDs) {
+ enabledRuleIds = ruleIDs;
+ }
+
+ public Set<String> getEnabledRuleIds() {
+ return enabledRuleIds;
+ }
+
+ public void setDisabledCategories(Set<String> categoryNames) {
+ inactiveCategoryNames = categoryNames;
+ }
+
+ public Set<String> getDisabledCategoryNames() {
+ return inactiveCategoryNames;
+ }
+
+ public void setMotherTongue(Language motherTongue) {
+ this.motherTongue = motherTongue;
+ }
+
+ public Language getMotherTongue() {
+ return motherTongue;
+ }
+
+ /**
+ * Get the Language object for the given localized language name.
+ *
+ * @param languageName
+ * e.g. <code>English</code> or <code>German</code> (case is
+ * significant)
+ * @return a Language object or <code>null</code>
+ */
+ private Language getLanguageForLocalizedName(final String languageName) {
+ for (final Language element : Language.LANGUAGES) {
+ if (NO_MOTHER_TONGUE.equals(languageName)) {
+ return Language.DEMO;
+ }
+ if (languageName.equals(messages.getString(element.getShortName()))) {
+ return element;
+ }
+ }
+ return null;
+ }
+
+ public void setRunServer(boolean serverMode) {
+ this.serverMode = serverMode;
+ }
+
+ public boolean getRunServer() {
+ if (serverCheckbox == null) {
+ return false;
+ }
+ return serverCheckbox.isSelected();
+ }
+
+ public void setServerPort(int serverPort) {
+ this.serverPort = serverPort;
+ }
+
+ public int getServerPort() {
+ if (serverPortField == null) {
+ return HTTPServer.DEFAULT_PORT;
+ }
+ return Integer.parseInt(serverPortField.getText());
+ }
+
+ /**
+ * Opens the dialog - for internal testing only.
+ */
+ public static void main(String[] args) throws IOException {
+ final ConfigurationDialog dlg = new ConfigurationDialog(null, false);
+ final List<Rule> rules = new ArrayList<Rule>();
+ final JLanguageTool lt = new JLanguageTool(Language.ENGLISH);
+ lt.activateDefaultPatternRules();
+ rules.addAll(lt.getAllRules());
+ dlg.show(rules);
+ }
+
+}
+
+class CategoryComparator implements Comparator<Rule> {
+
+ public int compare(final Rule r1, final Rule r2) {
+ final boolean hasCat = r1.getCategory() != null && r2.getCategory() != null;
+ if (hasCat) {
+ final int res = r1.getCategory().getName().compareTo(
+ r2.getCategory().getName());
+ if (res == 0) {
+ return r1.getDescription().compareToIgnoreCase(r2.getDescription());
+ }
+ return res;
+ }
+ return r1.getDescription().compareToIgnoreCase(r2.getDescription());
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/LanguageManagerDialog.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/LanguageManagerDialog.java
new file mode 100644
index 0000000..18c5b26
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/LanguageManagerDialog.java
@@ -0,0 +1,184 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.awt.Container;
+import java.awt.Dimension;
+import java.awt.Frame;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Insets;
+import java.awt.Toolkit;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.KeyEvent;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.JButton;
+import javax.swing.JComponent;
+import javax.swing.JDialog;
+import javax.swing.JList;
+import javax.swing.JPanel;
+import javax.swing.JRootPane;
+import javax.swing.JScrollPane;
+import javax.swing.KeyStroke;
+import javax.swing.filechooser.FileFilter;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.language.LanguageBuilder;
+
+/**
+ * Dialog for managing externally loaded rules.
+ *
+ * @author Daniel Naber
+ */
+public class LanguageManagerDialog implements ActionListener {
+
+ private JDialog dialog;
+
+ private JList list;
+ private JButton addButton;
+ private JButton removeButton;
+ private JButton closeButton;
+ private final List<File> ruleFiles = new ArrayList<File>();
+
+ private final Frame owner;
+ //private ResourceBundle messages = null;
+
+ public LanguageManagerDialog(Frame owner, List<Language> languages) {
+ this.owner = owner;
+ for (Language lang : languages) {
+ ruleFiles.add(new File(lang.getRuleFileName()));
+ }
+ //messages = JLanguageTool.getMessageBundle();
+ }
+
+ public void show() {
+ dialog = new JDialog(owner, true);
+ dialog.setTitle("Language Module Manager"); // FIXME: i18n
+
+ // close dialog when user presses Escape key:
+ // TODO: taken from ConfigurationDialog, avoid duplication:
+ final KeyStroke stroke = KeyStroke.getKeyStroke(KeyEvent.VK_ESCAPE, 0);
+ final ActionListener actionListener = new ActionListener() {
+ @SuppressWarnings("unused")
+ public void actionPerformed(ActionEvent actionEvent) {
+ dialog.setVisible(false);
+ }
+ };
+ final JRootPane rootPane = dialog.getRootPane();
+ rootPane.registerKeyboardAction(actionListener, stroke, JComponent.WHEN_IN_FOCUSED_WINDOW);
+
+ final Container contentPane = dialog.getContentPane();
+ contentPane.setLayout(new GridBagLayout());
+
+ list = new JList(ruleFiles.toArray(new File[]{}));
+ GridBagConstraints cons = new GridBagConstraints();
+ cons.insets = new Insets(4, 4, 4, 4);
+ cons.gridx = 0;
+ cons.gridy = 0;
+ cons.fill = GridBagConstraints.BOTH;
+ cons.weightx = 2.0f;
+ cons.weighty = 2.0f;
+ contentPane.add(new JScrollPane(list), cons);
+
+ cons = new GridBagConstraints();
+ cons.insets = new Insets(4, 4, 4, 4);
+ cons.fill = GridBagConstraints.HORIZONTAL;
+
+ final JPanel buttonPanel = new JPanel();
+ buttonPanel.setLayout(new GridBagLayout());
+ addButton = new JButton("Add..."); // FIXME: i18n
+ addButton.addActionListener(this);
+ cons.gridx = 1;
+ cons.gridy = 0;
+ buttonPanel.add(addButton, cons);
+
+ removeButton = new JButton("Remove"); // FIXME: i18n
+ removeButton.addActionListener(this);
+ cons.gridx = 1;
+ cons.gridy = 1;
+ buttonPanel.add(removeButton, cons);
+
+ closeButton = new JButton("Close"); // FIXME: i18n
+ closeButton.addActionListener(this);
+ cons.gridx = 1;
+ cons.gridy = 2;
+ buttonPanel.add(closeButton, cons);
+
+ cons.gridx = 1;
+ cons.gridy = 0;
+ cons = new GridBagConstraints();
+ cons.anchor = GridBagConstraints.NORTH;
+ contentPane.add(buttonPanel, cons);
+
+ dialog.pack();
+ dialog.setSize(300, 200);
+ // center on screen:
+ final Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
+ final Dimension frameSize = dialog.getSize();
+ dialog.setLocation(screenSize.width/2 - (frameSize.width/2), screenSize.height/2 - (frameSize.height/2));
+ dialog.setVisible(true);
+ }
+
+ public void actionPerformed(ActionEvent e) {
+ if (e.getSource() == addButton) {
+ final File ruleFile = Tools.openFileDialog(null, new XMLFileFilter());
+ // TODO: avoid duplicate files!
+ ruleFiles.add(ruleFile);
+ list.setListData(ruleFiles.toArray(new File[]{}));
+ } else if (e.getSource() == removeButton) {
+ if (list.getSelectedIndex() != -1) {
+ ruleFiles.remove(list.getSelectedIndex());
+ list.setListData(ruleFiles.toArray(new File[]{}));
+ }
+ } else if (e.getSource() == closeButton) {
+ dialog.setVisible(false);
+ } else {
+ throw new IllegalArgumentException("Don't know how to handle " + e);
+ }
+ }
+
+ /**
+ * Return all external Languages.
+ */
+ List<Language> getLanguages() {
+ final List<Language> languages = new ArrayList<Language>();
+ for (File ruleFile : ruleFiles) {
+ final Language newLanguage = LanguageBuilder.makeLanguage(ruleFile);
+ languages.add(newLanguage);
+ }
+ return languages;
+ }
+
+ static class XMLFileFilter extends FileFilter {
+ public boolean accept(final File f) {
+ if (f.getName().toLowerCase().endsWith(".xml") || f.isDirectory()) {
+ return true;
+ }
+ return false;
+ }
+ public String getDescription() {
+ return "*.xml";
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Main.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Main.java
new file mode 100644
index 0000000..eb73813
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Main.java
@@ -0,0 +1,738 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.awt.AWTException;
+import java.awt.Container;
+import java.awt.GridBagConstraints;
+import java.awt.GridBagLayout;
+import java.awt.Image;
+import java.awt.Insets;
+import java.awt.MenuItem;
+import java.awt.PopupMenu;
+import java.awt.Toolkit;
+import java.awt.datatransfer.Clipboard;
+import java.awt.datatransfer.DataFlavor;
+import java.awt.datatransfer.Transferable;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.MouseEvent;
+import java.awt.event.MouseListener;
+import java.awt.event.WindowEvent;
+import java.awt.event.WindowListener;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import javax.swing.Icon;
+import javax.swing.ImageIcon;
+import javax.swing.JButton;
+import javax.swing.JComboBox;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JSplitPane;
+import javax.swing.JTextArea;
+import javax.swing.JTextPane;
+import javax.swing.UIManager;
+import javax.swing.WindowConstants;
+import javax.swing.filechooser.FileFilter;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.jdesktop.jdic.tray.SystemTray;
+import org.jdesktop.jdic.tray.TrayIcon;
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.language.RuleFilenameException;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.server.HTTPServer;
+import de.danielnaber.languagetool.server.PortBindingException;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A simple GUI to check texts with.
+ *
+ * @author Daniel Naber
+ */
+public final class Main implements ActionListener {
+
+ private final ResourceBundle messages;
+
+ private static final String HTML_FONT_START = "<font face='Arial,Helvetica'>";
+ private static final String HTML_FONT_END = "</font>";
+
+ private final static String SYSTEM_TRAY_ICON_NAME = "/TrayIcon.png";
+ private static final String SYSTEM_TRAY_TOOLTIP = "LanguageTool";
+ private static final String CONFIG_FILE = ".languagetool.cfg";
+
+ private final Configuration config;
+
+ private JFrame frame;
+ private JTextArea textArea;
+ private JTextPane resultArea;
+ private JComboBox langBox;
+
+ private HTTPServer httpServer;
+
+ private final Map<Language, ConfigurationDialog> configDialogs = new HashMap<Language, ConfigurationDialog>();
+
+ // whether clicking on the window close button hides to system tray:
+ private boolean trayMode;
+
+ private boolean isInTray;
+
+ private Main() throws IOException {
+ config = new Configuration(new File(System.getProperty("user.home")),
+ CONFIG_FILE);
+ messages = JLanguageTool.getMessageBundle();
+ maybeStartServer();
+ }
+
+ private void createGUI() {
+ frame = new JFrame("LanguageTool " + JLanguageTool.VERSION);
+
+ try {
+ for (UIManager.LookAndFeelInfo info : UIManager
+ .getInstalledLookAndFeels()) {
+ if ("Nimbus".equals(info.getName())) {
+ UIManager.setLookAndFeel(info.getClassName());
+ break;
+ }
+ }
+ } catch (Exception ex) {
+ // Well, what can we do...
+ }
+
+ frame.setDefaultCloseOperation(WindowConstants.DO_NOTHING_ON_CLOSE);
+ frame.addWindowListener(new CloseListener());
+ frame.setIconImage(new ImageIcon(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(
+ Main.SYSTEM_TRAY_ICON_NAME)).getImage());
+ frame.setJMenuBar(new MainMenuBar(this, messages));
+
+ textArea = new JTextArea(messages.getString("guiDemoText"));
+ // TODO: wrong line number is displayed for lines that are wrapped
+ // automatically:
+ textArea.setLineWrap(true);
+ textArea.setWrapStyleWord(true);
+ resultArea = new JTextPane();
+ resultArea.setContentType("text/html");
+ resultArea.setText(HTML_FONT_START + messages.getString("resultAreaText")
+ + HTML_FONT_END);
+ resultArea.setEditable(false);
+ final JLabel label = new JLabel(messages.getString("enterText"));
+ final JButton button = new JButton(StringTools.getLabel(messages
+ .getString("checkText")));
+ button
+ .setMnemonic(StringTools.getMnemonic(messages.getString("checkText")));
+ button.addActionListener(this);
+
+ final JPanel panel = new JPanel();
+ panel.setLayout(new GridBagLayout());
+ final GridBagConstraints buttonCons = new GridBagConstraints();
+ buttonCons.gridx = 0;
+ buttonCons.gridy = 0;
+ panel.add(button, buttonCons);
+ buttonCons.gridx = 1;
+ buttonCons.gridy = 0;
+ panel.add(new JLabel(" " + messages.getString("textLanguage") + " "),
+ buttonCons);
+ buttonCons.gridx = 2;
+ buttonCons.gridy = 0;
+ langBox = new JComboBox();
+ populateLanguageBox();
+ // use the system default language to preselect the language from the combo
+ // box:
+ try {
+ final Locale defaultLocale = Locale.getDefault();
+ langBox.setSelectedItem(messages.getString(defaultLocale.getLanguage()));
+ } catch (final MissingResourceException e) {
+ // language not supported, so don't select a default
+ }
+ panel.add(langBox, buttonCons);
+
+ final Container contentPane = frame.getContentPane();
+ final GridBagLayout gridLayout = new GridBagLayout();
+ contentPane.setLayout(gridLayout);
+ final GridBagConstraints cons = new GridBagConstraints();
+ cons.insets = new Insets(5, 5, 5, 5);
+ cons.fill = GridBagConstraints.BOTH;
+ cons.weightx = 10.0f;
+ cons.weighty = 10.0f;
+ cons.gridx = 0;
+ cons.gridy = 1;
+ cons.weighty = 5.0f;
+ final JSplitPane splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT,
+ new JScrollPane(textArea), new JScrollPane(resultArea));
+ splitPane.setDividerLocation(200);
+ contentPane.add(splitPane, cons);
+
+ cons.fill = GridBagConstraints.NONE;
+ cons.gridx = 0;
+ cons.gridy = 2;
+ cons.weighty = 0.0f;
+ cons.insets = new Insets(3, 3, 3, 3);
+ // cons.fill = GridBagConstraints.NONE;
+ contentPane.add(label, cons);
+ cons.gridy = 3;
+ contentPane.add(panel, cons);
+
+ frame.pack();
+ frame.setSize(600, 550);
+ }
+
+ private void populateLanguageBox() {
+ final List<String> toSort = new ArrayList<String>();
+ langBox.removeAllItems();
+ for (final Language lang : Language.LANGUAGES) {
+ if (lang != Language.DEMO) {
+ try {
+ toSort.add(messages.getString(lang.getShortName()));
+ } catch (final MissingResourceException e) {
+ // can happen with external rules:
+ toSort.add(lang.getName());
+ }
+ }
+ }
+ Collections.sort(toSort);
+ for (final String lng : toSort) {
+ langBox.addItem(lng);
+ }
+ }
+
+ private void showGUI() {
+ frame.setVisible(true);
+ }
+
+ public void actionPerformed(final ActionEvent e) {
+ try {
+ if (e.getActionCommand().equals(
+ StringTools.getLabel(messages.getString("checkText")))) {
+ final JLanguageTool langTool = getCurrentLanguageTool();
+ checkTextAndDisplayResults(langTool, getCurrentLanguage());
+ } else {
+ throw new IllegalArgumentException("Unknown action " + e);
+ }
+ } catch (final Exception exc) {
+ Tools.showError(exc);
+ }
+ }
+
+ void loadFile() {
+ final File file = Tools.openFileDialog(frame, new PlainTextFilter());
+ if (file == null) {
+ return;
+ }
+ try {
+ final String fileContents = StringTools.readFile(new FileInputStream(file
+ .getAbsolutePath()));
+ textArea.setText(fileContents);
+ final JLanguageTool langTool = getCurrentLanguageTool();
+ checkTextAndDisplayResults(langTool, getCurrentLanguage());
+ } catch (final IOException e) {
+ Tools.showError(e);
+ }
+ }
+
+ void hideToTray() {
+ final String version = System.getProperty("java.version");
+ if (!isInTray && version.startsWith("1.5")) { // we don't run under <= 1.4,
+ // so we don't check for that
+ TrayIcon trayIcon = null;
+ try {
+ final Icon sysTrayIcon = new ImageIcon(JLanguageTool.getDataBroker().getFromResourceDirAsUrl(Main.SYSTEM_TRAY_ICON_NAME));
+ trayIcon = new TrayIcon(sysTrayIcon);
+ } catch (final NoClassDefFoundError e) {
+ throw new MissingJdicException(e);
+ }
+ final SystemTray tray = SystemTray.getDefaultSystemTray();
+ trayIcon.addActionListener(new TrayActionListener());
+ trayIcon.setToolTip(SYSTEM_TRAY_TOOLTIP);
+ tray.addTrayIcon(trayIcon);
+ } else if (!isInTray) {
+ // Java 1.6 or later
+ final java.awt.SystemTray tray = java.awt.SystemTray.getSystemTray();
+ final Image img = Toolkit.getDefaultToolkit().getImage(
+ JLanguageTool.getDataBroker().getFromResourceDirAsUrl(Main.SYSTEM_TRAY_ICON_NAME));
+ final PopupMenu popup = makePopupMenu();
+ try {
+ final java.awt.TrayIcon trayIcon = new java.awt.TrayIcon(img,
+ "tooltip", popup);
+ trayIcon.addMouseListener(new TrayActionListener());
+ trayIcon.setToolTip(SYSTEM_TRAY_TOOLTIP);
+ tray.add(trayIcon);
+ } catch (final AWTException e1) {
+ // thrown if there's no system tray
+ Tools.showError(e1);
+ }
+ }
+ isInTray = true;
+ frame.setVisible(false);
+ }
+
+ private PopupMenu makePopupMenu() {
+ final PopupMenu popup = new PopupMenu();
+ final ActionListener rmbListener = new TrayActionRMBListener();
+ // Check clipboard text:
+ final MenuItem checkClipboardItem = new MenuItem(StringTools
+ .getLabel(messages.getString("guiMenuCheckClipboard")));
+ checkClipboardItem.addActionListener(rmbListener);
+ popup.add(checkClipboardItem);
+ // Open main window:
+ final MenuItem restoreItem = new MenuItem(StringTools.getLabel(messages
+ .getString("guiMenuShowMainWindow")));
+ restoreItem.addActionListener(rmbListener);
+ popup.add(restoreItem);
+ // Exit:
+ final MenuItem exitItem = new MenuItem(StringTools.getLabel(messages
+ .getString("guiMenuQuit")));
+ exitItem.addActionListener(rmbListener);
+ popup.add(exitItem);
+ return popup;
+ }
+
+ void addLanguage() {
+ final LanguageManagerDialog lmd = new LanguageManagerDialog(frame, Language
+ .getExternalLanguages());
+ lmd.show();
+ try {
+ Language.reInit(lmd.getLanguages());
+ } catch (final RuleFilenameException e) {
+ Tools.showErrorMessage(e);
+ }
+ populateLanguageBox();
+ }
+
+ void showOptions() {
+ final JLanguageTool langTool = getCurrentLanguageTool();
+ final List<Rule> rules = langTool.getAllRules();
+ final ConfigurationDialog configDialog = getCurrentConfigDialog();
+ configDialog.show(rules); // this blocks until OK/Cancel is clicked in the
+ // dialog
+ config.setDisabledRuleIds(configDialog.getDisabledRuleIds());
+ config.setEnabledRuleIds(configDialog.getEnabledRuleIds());
+ config.setDisabledCategoryNames(configDialog.getDisabledCategoryNames());
+ config.setMotherTongue(configDialog.getMotherTongue());
+ config.setRunServer(configDialog.getRunServer());
+ config.setServerPort(configDialog.getServerPort());
+ // Stop server, start new server if requested:
+ stopServer();
+ maybeStartServer();
+ }
+
+ private void restoreFromTray() {
+ frame.setVisible(true);
+ }
+
+ // show GUI and check the text from clipboard/selection:
+ private void restoreFromTrayAndCheck() {
+ final String s = getClipboardText();
+ restoreFromTray();
+ textArea.setText(s);
+ final JLanguageTool langTool = getCurrentLanguageTool();
+ checkTextAndDisplayResults(langTool, getCurrentLanguage());
+ }
+
+ void checkClipboardText() {
+ final String s = getClipboardText();
+ textArea.setText(s);
+ final JLanguageTool langTool = getCurrentLanguageTool();
+ checkTextAndDisplayResults(langTool, getCurrentLanguage());
+ }
+
+ private String getClipboardText() {
+ // get text from clipboard or selection:
+ Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemSelection();
+ if (clipboard == null) { // on Windows
+ clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
+ }
+ String s = null;
+ final Transferable data = clipboard.getContents(this);
+ try {
+ if (data != null
+ && data.isDataFlavorSupported(DataFlavor.getTextPlainUnicodeFlavor())) {
+ final DataFlavor df = DataFlavor.getTextPlainUnicodeFlavor();
+ final Reader sr = df.getReaderForText(data);
+ s = StringTools.readerToString(sr);
+ } else {
+ s = "";
+ }
+ } catch (final Exception ex) {
+ ex.printStackTrace();
+ if (data != null) {
+ s = data.toString();
+ } else {
+ s = "";
+ }
+ }
+ return s;
+ }
+
+ void quitOrHide() {
+ if (trayMode) {
+ hideToTray();
+ } else {
+ quit();
+ }
+ }
+
+ void quit() {
+ stopServer();
+ try {
+ config.saveConfiguration();
+ } catch (final IOException e) {
+ Tools.showError(e);
+ }
+ frame.setVisible(false);
+ System.exit(0);
+ }
+
+ private void maybeStartServer() {
+ if (config.getRunServer()) {
+ httpServer = new HTTPServer(config.getServerPort());
+ try {
+ httpServer.run();
+ } catch (final PortBindingException e) {
+ JOptionPane.showMessageDialog(null, e.getMessage(), "Error",
+ JOptionPane.ERROR_MESSAGE);
+ }
+ }
+ }
+
+ private void stopServer() {
+ if (httpServer != null) {
+ httpServer.stop();
+ httpServer = null;
+ }
+ }
+
+ private Language getCurrentLanguage() {
+ final String langName = langBox.getSelectedItem().toString();
+ String lang = langName;
+ for (final Enumeration<String> e = messages.getKeys(); e.hasMoreElements();) {
+ final String elem = e.nextElement();
+ if (messages.getString(elem).equals(langName)) {
+ lang = elem;
+ break;
+ }
+ }
+ // external rules:
+ if (lang.length() > 2) {
+ return Language.getLanguageForName(lang);
+ }
+ return Language.getLanguageForShortName(lang);
+ }
+
+ private ConfigurationDialog getCurrentConfigDialog() {
+ final Language language = getCurrentLanguage();
+ ConfigurationDialog configDialog = null;
+ if (configDialogs.containsKey(language)) {
+ configDialog = configDialogs.get(language);
+ } else {
+ configDialog = new ConfigurationDialog(frame, false);
+ configDialog.setMotherTongue(config.getMotherTongue());
+ configDialog.setDisabledRules(config.getDisabledRuleIds());
+ configDialog.setEnabledRules(config.getEnabledRuleIds());
+ configDialog.setDisabledCategories(config.getDisabledCategoryNames());
+ configDialog.setRunServer(config.getRunServer());
+ configDialog.setServerPort(config.getServerPort());
+ configDialogs.put(language, configDialog);
+ }
+ return configDialog;
+ }
+
+ private JLanguageTool getCurrentLanguageTool() {
+ final JLanguageTool langTool;
+ try {
+ final ConfigurationDialog configDialog = getCurrentConfigDialog();
+ langTool = new JLanguageTool(getCurrentLanguage(), configDialog
+ .getMotherTongue());
+ langTool.activateDefaultPatternRules();
+ langTool.activateDefaultFalseFriendRules();
+ final Set<String> disabledRules = configDialog.getDisabledRuleIds();
+ if (disabledRules != null) {
+ for (final String ruleId : disabledRules) {
+ langTool.disableRule(ruleId);
+ }
+ }
+ final Set<String> disabledCategories = configDialog
+ .getDisabledCategoryNames();
+ if (disabledCategories != null) {
+ for (final String categoryName : disabledCategories) {
+ langTool.disableCategory(categoryName);
+ }
+ }
+ final Set<String> enabledRules = configDialog.getEnabledRuleIds();
+ if (enabledRules != null) {
+ for (String ruleName : enabledRules) {
+ langTool.enableDefaultOffRule(ruleName);
+ langTool.enableRule(ruleName);
+ }
+ }
+ } catch (final IOException ioe) {
+ throw new RuntimeException(ioe);
+ } catch (final ParserConfigurationException ex) {
+ throw new RuntimeException(ex);
+ } catch (final SAXException ex) {
+ throw new RuntimeException(ex);
+ }
+ return langTool;
+ }
+
+ private void checkTextAndDisplayResults(final JLanguageTool langTool,
+ final Language lang) {
+ if (StringTools.isEmpty(textArea.getText().trim())) {
+ textArea.setText(messages.getString("enterText2"));
+ } else {
+ final StringBuilder sb = new StringBuilder();
+ final String startCheckText = Tools.makeTexti18n(messages,
+ "startChecking", new Object[] { lang.getTranslatedName(messages) });
+ resultArea.setText(HTML_FONT_START + startCheckText + "<br>\n"
+ + HTML_FONT_END);
+ resultArea.repaint(); // FIXME: why doesn't this work?
+ // TODO: resultArea.setCursor(new Cursor(Cursor.WAIT_CURSOR));
+ sb.append(startCheckText);
+ sb.append("...<br>\n");
+ int matches = 0;
+ try {
+ matches = checkText(langTool, textArea.getText(), sb);
+ } catch (final Exception ex) {
+ sb.append("<br><br><b><font color=\"red\">" + ex.toString() + "<br>");
+ final StackTraceElement[] elements = ex.getStackTrace();
+ for (final StackTraceElement element : elements) {
+ sb.append(element);
+ sb.append("<br>");
+ }
+ sb.append("</font></b><br>");
+ ex.printStackTrace();
+ }
+ final String checkDone = Tools.makeTexti18n(messages, "checkDone",
+ new Object[] {matches});
+ sb.append(checkDone);
+ sb.append("<br>\n");
+ resultArea.setText(HTML_FONT_START + sb.toString() + HTML_FONT_END);
+ resultArea.setCaretPosition(0);
+ }
+ }
+
+ private int checkText(final JLanguageTool langTool, final String text,
+ final StringBuilder sb) throws IOException {
+ final long startTime = System.currentTimeMillis();
+ final List<RuleMatch> ruleMatches = langTool.check(text);
+ final long startTimeMatching = System.currentTimeMillis();
+ int i = 0;
+ for (final RuleMatch match : ruleMatches) {
+ final String output = Tools.makeTexti18n(messages, "result1",
+ new Object[] {i + 1,
+ match.getLine() + 1,
+ match.getColumn()});
+ sb.append(output);
+ String msg = match.getMessage();
+ msg = msg.replaceAll("<suggestion>", "<b>");
+ msg = msg.replaceAll("</suggestion>", "</b>");
+ msg = msg.replaceAll("<old>", "<b>");
+ msg = msg.replaceAll("</old>", "</b>");
+ sb.append("<b>" + messages.getString("errorMessage") + "</b> " + msg + "<br>\n");
+ if (match.getSuggestedReplacements().size() > 0) {
+ final String repl = StringTools.listToString(match
+ .getSuggestedReplacements(), "; ");
+ sb.append("<b>" + messages.getString("correctionMessage") + "</b> "
+ + repl + "<br>\n");
+ }
+ final String context = Tools.getContext(match.getFromPos(), match
+ .getToPos(), text);
+ sb.append("<b>" + messages.getString("errorContext") + "</b> " + context);
+ sb.append("<br>\n");
+ i++;
+ }
+ final long endTime = System.currentTimeMillis();
+ sb.append(Tools.makeTexti18n(messages, "resultTime", new Object[] {
+ endTime - startTime,
+ endTime - startTimeMatching}));
+ return ruleMatches.size();
+ }
+
+ private void setTrayMode(boolean trayMode) {
+ this.trayMode = trayMode;
+ }
+
+ public static void main(final String[] args) {
+ try {
+ final Main prg = new Main();
+ if (args.length == 1
+ && (args[0].equals("-t") || args[0].equals("--tray"))) {
+ // dock to systray on startup
+ javax.swing.SwingUtilities.invokeLater(new Runnable() {
+ public void run() {
+ try {
+ prg.createGUI();
+ prg.setTrayMode(true);
+ prg.hideToTray();
+ } catch (final MissingJdicException e) {
+ JOptionPane.showMessageDialog(null, e.getMessage(), "Error",
+ JOptionPane.ERROR_MESSAGE);
+ System.exit(1);
+ } catch (final Exception e) {
+ Tools.showError(e);
+ System.exit(1);
+ }
+ }
+ });
+ } else if (args.length >= 1) {
+ System.out
+ .println("Usage: java de.danielnaber.languagetool.gui.Main [-t|--tray]");
+ System.out
+ .println(" -t, --tray: dock LanguageTool to system tray on startup");
+ } else {
+ javax.swing.SwingUtilities.invokeLater(new Runnable() {
+ public void run() {
+ try {
+ prg.createGUI();
+ prg.showGUI();
+ } catch (final Exception e) {
+ Tools.showError(e);
+ }
+ }
+ });
+ }
+ } catch (final Exception e) {
+ Tools.showError(e);
+ }
+ }
+
+ //
+ // The System Tray stuff
+ //
+
+ class TrayActionRMBListener implements ActionListener {
+
+ public void actionPerformed(ActionEvent e) {
+ if (e.getActionCommand().equalsIgnoreCase(
+ StringTools.getLabel(messages.getString("guiMenuCheckClipboard")))) {
+ restoreFromTrayAndCheck();
+ } else if (e.getActionCommand().equalsIgnoreCase(
+ StringTools.getLabel(messages.getString("guiMenuShowMainWindow")))) {
+ restoreFromTray();
+ } else if (e.getActionCommand().equalsIgnoreCase(
+ StringTools.getLabel(messages.getString("guiMenuQuit")))) {
+ quit();
+ } else {
+ JOptionPane.showMessageDialog(null, "Unknown action: "
+ + e.getActionCommand(), "Error", JOptionPane.ERROR_MESSAGE);
+ }
+ }
+
+ }
+
+ class TrayActionListener implements ActionListener, MouseListener {
+
+ // for Java 1.5 / Jdic:
+ public void actionPerformed(@SuppressWarnings("unused")ActionEvent e) {
+ handleClick();
+ }
+
+ // Java 1.6:
+ public void mouseClicked(@SuppressWarnings("unused")MouseEvent e) {
+ handleClick();
+ }
+
+ private void handleClick() {
+ if (frame.isVisible() && frame.isActive()) {
+ frame.setVisible(false);
+ } else if (frame.isVisible() && !frame.isActive()) {
+ frame.toFront();
+ restoreFromTrayAndCheck();
+ } else {
+ restoreFromTrayAndCheck();
+ }
+ }
+
+ public void mouseEntered(@SuppressWarnings("unused") MouseEvent e) {
+ }
+
+ public void mouseExited(@SuppressWarnings("unused")MouseEvent e) {
+ }
+
+ public void mousePressed(@SuppressWarnings("unused")MouseEvent e) {
+ }
+
+ public void mouseReleased(@SuppressWarnings("unused")MouseEvent e) {
+ }
+
+ }
+
+ class CloseListener implements WindowListener {
+
+ public void windowClosing(@SuppressWarnings("unused")WindowEvent e) {
+ quitOrHide();
+ }
+
+ public void windowActivated(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ public void windowClosed(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ public void windowDeactivated(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ public void windowDeiconified(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ public void windowIconified(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ public void windowOpened(@SuppressWarnings("unused")WindowEvent e) {
+ }
+
+ }
+
+ static class PlainTextFilter extends FileFilter {
+
+ @Override
+ public boolean accept(final File f) {
+ if (f.getName().toLowerCase().endsWith(".txt")) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public String getDescription() {
+ return "*.txt";
+ }
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MainMenuBar.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MainMenuBar.java
new file mode 100644
index 0000000..72e3191
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MainMenuBar.java
@@ -0,0 +1,170 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.awt.Event;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.KeyEvent;
+import java.util.ResourceBundle;
+
+import javax.swing.JMenu;
+import javax.swing.JMenuBar;
+import javax.swing.JMenuItem;
+import javax.swing.JOptionPane;
+import javax.swing.KeyStroke;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * The menu bar of the main dialog.
+ *
+ * @author Daniel Naber
+ */
+class MainMenuBar extends JMenuBar implements ActionListener {
+
+ private static final long serialVersionUID = -7160998682243081767L;
+
+ private final ResourceBundle messages;
+
+ // File:
+ private String openText;
+ private String checkClipboardText;
+ private String dockToTrayText;
+ private String addLanguageText;
+ private String optionsText;
+ private String quitText;
+ // Help:
+ private String aboutText;
+
+ private final Main prg;
+ private JMenu fileMenu;
+ private JMenu helpMenu;
+
+ MainMenuBar(Main prg, ResourceBundle messages) {
+ this.prg = prg;
+ this.messages = messages;
+ initStrings();
+ fileMenu.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuFile")));
+ helpMenu.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuHelp")));
+ // "Open":
+ final JMenuItem openItem = new JMenuItem(openText);
+ openItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_O, Event.CTRL_MASK));
+ openItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuOpen")));
+ openItem.addActionListener(this);
+ fileMenu.add(openItem);
+ // "Check Text in Clipboard":
+ final JMenuItem checkClipboardItem = new JMenuItem(checkClipboardText);
+ checkClipboardItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_Y, Event.CTRL_MASK));
+ checkClipboardItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuCheckClipboard")));
+ checkClipboardItem.addActionListener(this);
+ fileMenu.add(checkClipboardItem);
+ // "Hide to System Tray":
+ final JMenuItem dockToTrayItem = new JMenuItem(dockToTrayText);
+ dockToTrayItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuHide")));
+ dockToTrayItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_D, Event.CTRL_MASK));
+ dockToTrayItem.addActionListener(this);
+ fileMenu.add(dockToTrayItem);
+ // "Add Language":
+ final JMenuItem addLanguageItem = new JMenuItem(addLanguageText);
+ addLanguageItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuAddRules")));
+ addLanguageItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_A, Event.CTRL_MASK));
+ addLanguageItem.addActionListener(this);
+ fileMenu.add(addLanguageItem);
+ // "Options":
+ final JMenuItem optionsItem = new JMenuItem(optionsText);
+ optionsItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuOptions")));
+ optionsItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_S, Event.CTRL_MASK));
+ optionsItem.addActionListener(this);
+ fileMenu.add(optionsItem);
+ // "Quit":
+ final JMenuItem quitItem = new JMenuItem(quitText);
+ quitItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuQuit")));
+ quitItem.setAccelerator(KeyStroke.getKeyStroke(KeyEvent.VK_Q, Event.CTRL_MASK));
+ quitItem.addActionListener(this);
+ fileMenu.add(quitItem);
+ // "About":
+ final JMenuItem helpItem = new JMenuItem(aboutText);
+ helpItem.addActionListener(this);
+ helpItem.setMnemonic(StringTools.getMnemonic(
+ messages.getString("guiMenuAbout")));
+ helpMenu.add(helpItem);
+ // add menus:
+ add(fileMenu);
+ add(helpMenu);
+ }
+
+ private void initStrings() {
+ fileMenu = new JMenu(StringTools.getLabel(
+ messages.getString("guiMenuFile")));
+ helpMenu = new JMenu(StringTools.getLabel(
+ messages.getString("guiMenuHelp")));
+ // File:
+ openText = StringTools.getLabel(
+ messages.getString("guiMenuOpen"));
+ checkClipboardText = StringTools.getLabel(
+ messages.getString("guiMenuCheckClipboard"));
+ dockToTrayText = StringTools.getLabel(
+ messages.getString("guiMenuHide"));
+ addLanguageText = StringTools.getLabel(
+ messages.getString("guiMenuAddRules"));
+ optionsText = StringTools.getLabel(
+ messages.getString("guiMenuOptions"));
+ quitText = StringTools.getLabel(
+ messages.getString("guiMenuQuit"));
+ // Help:
+ aboutText = StringTools.getLabel(
+ messages.getString("guiMenuAbout"));
+ }
+
+ public void actionPerformed(ActionEvent e) {
+ if (e.getActionCommand().equals(openText)) {
+ prg.loadFile();
+ } else if (e.getActionCommand().equals(checkClipboardText)) {
+ prg.checkClipboardText();
+ } else if (e.getActionCommand().equals(dockToTrayText)) {
+ try {
+ prg.hideToTray();
+ } catch (MissingJdicException ex) {
+ JOptionPane.showMessageDialog(null, ex.getMessage(), "Error",
+ JOptionPane.ERROR_MESSAGE);
+ }
+ } else if (e.getActionCommand().equals(addLanguageText)) {
+ prg.addLanguage();
+ } else if (e.getActionCommand().equals(optionsText)) {
+ prg.showOptions();
+ } else if (e.getActionCommand().equals(quitText)) {
+ prg.quit();
+ } else if (e.getActionCommand().equals(aboutText)) {
+ final AboutDialog about = new AboutDialog(messages);
+ about.show();
+ } else {
+ throw new IllegalArgumentException("Unknown action " + e);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MissingJdicException.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MissingJdicException.java
new file mode 100644
index 0000000..6dcf5de
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/MissingJdicException.java
@@ -0,0 +1,38 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+/**
+ * Exception thrown with Java 1.5 if the jdic library cannot be found.
+ *
+ * @author Daniel Naber
+ */
+public class MissingJdicException extends RuntimeException {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 8822404582351420654L;
+
+ public MissingJdicException(Throwable throwable) {
+ super("TrayIcon class not found. Please unzip " +
+ "'standalone-libs.zip' in your LanguageTool installation directory.", throwable);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Tools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Tools.java
new file mode 100644
index 0000000..5abe803
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/gui/Tools.java
@@ -0,0 +1,192 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.gui;
+
+import java.awt.Frame;
+import java.io.File;
+import java.text.MessageFormat;
+import java.util.ResourceBundle;
+
+import javax.swing.JFileChooser;
+import javax.swing.JOptionPane;
+import javax.swing.filechooser.FileFilter;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * GUI-related tools.
+ *
+ * @author Daniel Naber
+ */
+public class Tools {
+
+ private static final int DEFAULT_CONTEXT_SIZE = 40; // characters
+ private static final String MARKER_START = "<b><font color=\"red\">";
+ private static final String MARKER_END = "</font></b>";
+
+ private Tools() {
+ // no constructor
+ }
+
+ public static String makeTexti18n(final ResourceBundle messages, final String key,
+ final Object[] messageArguments) {
+ final MessageFormat formatter = new MessageFormat("");
+ formatter.applyPattern(messages.getString(key));
+ return formatter.format(messageArguments);
+ }
+
+ /**
+ * Get the default context (40 characters) of the given text range,
+ * highlighting the range with HTML.
+ */
+ public static String getContext(final int fromPos, final int toPos, final String text) {
+ return getContext(fromPos, toPos, text, DEFAULT_CONTEXT_SIZE);
+ }
+
+ /**
+ * Get the context (<code>contextSize</code> characters) of the given text
+ * range, highlighting the range with HTML code.
+ */
+ public static String getContext(final int fromPos, final int toPos, final String fileContents,
+ int contextSize) {
+ return getContext(fromPos, toPos, fileContents, contextSize, MARKER_START,
+ MARKER_END, true);
+ }
+
+ /**
+ * Get the context (<code>contextSize</code> characters) of the given text
+ * range, highlighting the range with the given marker strings, not escaping
+ * HTML.
+ */
+ public static String getContext(final int fromPos, final int toPos,
+ final String fileContents, final int contextSize,
+ final String markerStart, final String markerEnd) {
+ return getContext(fromPos, toPos, fileContents, contextSize, markerStart,
+ markerEnd, false);
+ }
+ /**
+ * Get the context (<code>contextSize</code> characters) of the given text
+ * range, highlighting the range with the given marker strings.
+ *
+ * @param fromPos
+ * the start position of the error in characters
+ * @param toPos
+ * the end position of the error in characters
+ * @param text
+ * the text from which the context should be taken
+ * @param contextSize
+ * the size of the context in characters
+ * @param markerStart
+ * the string used to mark the beginning of the error
+ * @param markerEnd
+ * the string used to mark the end of the error
+ * @param escapeHTML
+ * whether HTML/XML characters should be escaped
+ */
+ public static String getContext(final int fromPos, final int toPos,
+ String text, final int contextSize, final String markerStart,
+ final String markerEnd, final boolean escapeHTML) {
+ text = text.replace('\n', ' ');
+ // calculate context region:
+ int startContent = fromPos - contextSize;
+ String prefix = "...";
+ String postfix = "...";
+ String markerPrefix = " ";
+ if (startContent < 0) {
+ prefix = "";
+ markerPrefix = "";
+ startContent = 0;
+ }
+ int endContent = toPos + contextSize;
+ final int fileLen = text.length();
+ if (endContent > fileLen) {
+ postfix = "";
+ endContent = fileLen;
+ }
+ // make "^" marker. inefficient but robust implementation:
+ final StringBuilder marker = new StringBuilder();
+ final int totalLen = fileLen + prefix.length();
+ for (int i = 0; i < totalLen; i++) {
+ if (i >= fromPos && i < toPos) {
+ marker.append('^');
+ } else {
+ marker.append(' ');
+ }
+ }
+ // now build context string plus marker:
+ final StringBuilder sb = new StringBuilder();
+ sb.append(prefix);
+ sb.append(text.substring(startContent, endContent));
+ final String markerStr = markerPrefix
+ + marker.substring(startContent, endContent);
+ sb.append(postfix);
+ final int startMark = markerStr.indexOf('^');
+ final int endMark = markerStr.lastIndexOf('^');
+ String result = sb.toString();
+ if (escapeHTML) {
+ result = StringTools.escapeHTML(result.substring(0, startMark))
+ + markerStart
+ + StringTools.escapeHTML(result.substring(startMark, endMark + 1))
+ + markerEnd + StringTools.escapeHTML(result.substring(endMark + 1));
+ } else {
+ result = result.substring(0, startMark) + markerStart
+ + result.substring(startMark, endMark + 1) + markerEnd
+ + result.substring(endMark + 1);
+ }
+ return result;
+ }
+
+ /**
+ * Show a file chooser dialog and return the file selected by the user or
+ * <code>null</code>.
+ */
+ static File openFileDialog(final Frame frame, final FileFilter fileFilter) {
+ final JFileChooser jfc = new JFileChooser();
+ jfc.setFileFilter(fileFilter);
+ jfc.showOpenDialog(frame);
+ final File file = jfc.getSelectedFile();
+ if (file == null) {
+ return null;
+ }
+ return file;
+ }
+
+ /**
+ * Show the exception (with stacktrace) in a dialog and print it to STDERR.
+ */
+ static void showError(final Exception e) {
+ final String msg = de.danielnaber.languagetool.tools.Tools
+ .getFullStackTrace(e);
+ JOptionPane
+ .showMessageDialog(null, msg, "Error", JOptionPane.ERROR_MESSAGE);
+ e.printStackTrace();
+ }
+
+ /**
+ * Show the exception (message without stacktrace) in a dialog and print it to
+ * STDERR.
+ */
+ static void showErrorMessage(final Exception e) {
+ final String msg = e.getMessage();
+ JOptionPane
+ .showMessageDialog(null, msg, "Error", JOptionPane.ERROR_MESSAGE);
+ e.printStackTrace();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java
new file mode 100644
index 0000000..fb1df60
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Belarusian.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.be.BelarusianTagger;
+
+/**
+ * Belarusian language declarations.
+ *
+ * Copyright (C) 2010 Alex Buloichik (alex73mail@gmail.com)
+ */
+public class Belarusian extends Language {
+
+ private static final String[] COUNTRIES = { "BY" };
+
+ private final Tagger tagger = new BelarusianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Belarusian";
+ }
+
+ public String getShortName() {
+ return "be";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] { new Contributor("Alex Buloichik") };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java
new file mode 100644
index 0000000..77d79ae
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Bokmal.java
@@ -0,0 +1,104 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+/*import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer; */
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+/*import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator;*/
+/*import de.danielnaber.languagetool.tagging.en.EnglishTagger;*/
+import de.danielnaber.languagetool.tagging.nb.BokmalTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+/*import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer;*/
+
+public class Bokmal extends Language {
+
+ private final Tagger tagger = new BokmalTagger();
+// private final Tokenizer wordTokenizer = new BokmalWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nb");
+// private final Synthesizer synthesizer = new BokmalSynthesizer();
+// private final Disambiguator disambiguator = new BokmalRuleDisambiguator();
+
+ private static final String[] COUNTRIES = {"NO"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Bokmal";
+ }
+
+ public final String getShortName() {
+ return "nb";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+/* public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }*/
+
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Arno Teigseth")/*,
+ new Contributor("Arno Teigseth")*/};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("EN_UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to English:
+// ids.add("EN_A_VS_AN");
+// ids.add("EN_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java
new file mode 100644
index 0000000..4e0eb67
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Catalan.java
@@ -0,0 +1,91 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ca.CatalanSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.ca.CatalanTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.rules.ca.CastellanismesReplaceRule;
+import de.danielnaber.languagetool.rules.ca.AccentuacioReplaceRule;
+
+public class Catalan extends Language {
+
+ private final Tagger tagger = new CatalanTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ca");
+ private final Synthesizer synthesizer = new CatalanSynthesizer();
+// private CastellanismesReplaceRule castella = new CastellanismesReplaceRule();
+
+ private static final String[] COUNTRIES = {
+ "ES"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Catalan";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public String getShortName() {
+ return "ca";
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Ricard Roca")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add(CastellanismesReplaceRule.CATALAN_CASTELLANISMES_REPLACE_RULE);
+ ids.add(AccentuacioReplaceRule.CATALAN_ACCENTUACIO_REPLACE_RULE);
+ return ids;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java
new file mode 100644
index 0000000..e38d635
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Contributor.java
@@ -0,0 +1,63 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+/**
+ * A person that contributed rules or code to LanguageTool.
+ *
+ * @author Daniel Naber
+ */
+public class Contributor {
+
+ private final String name;
+ private String remark;
+ private String url;
+
+ Contributor(String name) {
+ if (name == null) {
+ throw new NullPointerException("name cannot be null");
+ }
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public final String toString() {
+ return getName();
+ }
+
+ public String getRemark() {
+ return remark;
+ }
+
+ public void setRemark(final String remark) {
+ this.remark = remark;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(final String url) {
+ this.url = url;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java
new file mode 100644
index 0000000..d3154d7
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Czech.java
@@ -0,0 +1,73 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.cs.CzechTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.cs.CzechSentenceTokenizer;
+
+public class Czech extends Language {
+
+ private final Tagger tagger = new CzechTagger();
+ private final SentenceTokenizer sentenceTokenizer = new CzechSentenceTokenizer();
+
+ private static final String[] COUNTRIES = {"CZ"};
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Czech";
+ }
+
+ public String getShortName() {
+ return "cs";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Jozef Ličko")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java
new file mode 100644
index 0000000..d114c40
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Danish.java
@@ -0,0 +1,78 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.da.DanishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Danish extends Language {
+
+ private final Tagger tagger = new DanishTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("da");
+
+ private static final String[] COUNTRIES = {"DK"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Danish";
+ }
+
+ public final String getShortName() {
+ return "da";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Esben Aaberg")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS"); // correction for genitive apostrophes eg. "Lis' hund" made in UnpairedQuotesBracketsRule
+ ids.add("UPPERCASE_SENTENCE_START"); // abbreviation exceptions, done in DanishSentenceTokenizer
+ // "WORD_REPEAT_RULE" implemented in grammar.xml
+ ids.add("WHITESPACE_RULE");
+ // specific to Danish:
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java
new file mode 100644
index 0000000..ab4284b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Demo.java
@@ -0,0 +1,60 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+
+public class Demo extends Language {
+
+ private final Tagger tagger = new DemoTagger();
+
+ public Locale getLocale() {
+ return new Locale("en");
+ }
+
+ public String getName() {
+ return "Testlanguage";
+ }
+
+ public String getShortName() {
+ return "xx";
+ }
+
+ public String[] getCountryVariants() {
+ return new String[] {"XX"};
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return null;
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ return null;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java
new file mode 100644
index 0000000..0670736
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Dutch.java
@@ -0,0 +1,99 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.nl.DutchSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.nl.DutchRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.nl.DutchTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.nl.DutchWordTokenizer;
+
+public class Dutch extends Language {
+
+ private final Tagger tagger = new DutchTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("nl");
+ private final Synthesizer synthesizer = new DutchSynthesizer();
+ private final Disambiguator disambiguator = new DutchRuleDisambiguator();
+ private final Tokenizer wdTokenizer = new DutchWordTokenizer();
+
+ private static final String[] COUNTRIES = { "NL", "BE" };
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Dutch";
+ }
+
+ public final String getShortName() {
+ return "nl";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wdTokenizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public final Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Ruud Baars");
+ contributor.setUrl("http://www.opentaal.org");
+ return new Contributor[] { contributor };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java
new file mode 100644
index 0000000..0bf16e8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/English.java
@@ -0,0 +1,103 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.en.EnglishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.en.EnglishRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.en.EnglishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.en.EnglishWordTokenizer;
+
+public class English extends Language {
+
+ private final Tagger tagger = new EnglishTagger();
+ private final Tokenizer wordTokenizer = new EnglishWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en");
+ private final Synthesizer synthesizer = new EnglishSynthesizer();
+ private final Disambiguator disambiguator = new EnglishRuleDisambiguator();
+
+ private static final String[] COUNTRIES = {"GB", "US", "AU", "CA", "NZ", "ZA" };
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "English";
+ }
+
+ public final String getShortName() {
+ return "en";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Marcin Miłkowski"),
+ new Contributor("Daniel Naber")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("EN_UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to English:
+ ids.add("EN_A_VS_AN");
+ ids.add("EN_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java
new file mode 100644
index 0000000..0e48d98
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Esperanto.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.eo.EsperantoTagger;
+
+public class Esperanto extends Language {
+
+ private final Tagger tagger = new EsperantoTagger();
+
+ public Locale getLocale() {
+ return new Locale("eo");
+ }
+
+ public String getName() {
+ return "Esperanto";
+ }
+
+ public String getShortName() {
+ return "eo";
+ }
+
+ public String[] getCountryVariants() {
+ /* return "ANY" country code as a "country-less" placeholder for OOo: */
+ return new String[] {"ANY"};
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {
+ new Contributor("Dominique Pellé")
+ };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ ids.add("FRENCH_WHITESPACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java
new file mode 100644
index 0000000..96dc5fc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/French.java
@@ -0,0 +1,90 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.fr.FrenchRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.fr.FrenchTagger;
+
+public class French extends Language {
+
+ private final Tagger tagger = new FrenchTagger();
+ private final Disambiguator disambiguator = new FrenchRuleDisambiguator();
+ private static final Unifier FRENCH_UNIFIER = new Unifier();
+
+ private static final String[] COUNTRIES = {"FR", "", "BE", "CH", "CA",
+ "LU", "MC", "CM", "CI", "HI", "ML", "SN", "CD", "MA", "RE"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "French";
+ }
+
+ public String getShortName() {
+ return "fr";
+ }
+
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public Unifier getUnifier() {
+ return FRENCH_UNIFIER;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor hVoisard = new Contributor("Hugo Voisard");
+ hVoisard.setRemark("2006-2007");
+ return new Contributor[] {
+ new Contributor("Agnes Souque"),
+ hVoisard
+ };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add("FRENCH_WHITESPACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java
new file mode 100644
index 0000000..abd2158
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Galician.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.gl.GalicianTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.gl.GalicianWordTokenizer;
+
+public class Galician extends Language {
+
+ private final Tagger tagger = new GalicianTagger();
+ private final Tokenizer wordTokenizer = new GalicianWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("gl");
+
+ private static final String[] COUNTRIES = {"ES"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Galician";
+ }
+
+ public final String getShortName() {
+ return "gl";
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Susana Sotelo Docío");
+ contributor.setUrl("http://www.g11n.net/languagetool-gl");
+ return new Contributor[] { contributor };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java
new file mode 100644
index 0000000..2df4cd4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/German.java
@@ -0,0 +1,87 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.de.GermanTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class German extends Language {
+
+ private final Tagger tagger = new GermanTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("de");
+
+ private static final String[] COUNTRIES = {
+ "DE", "CH", "AT", "LU", "LI", "BE"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "German";
+ }
+
+ public String getShortName() {
+ return "de";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Daniel Naber")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("GERMAN_WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to German:
+ ids.add("DE_AGREEMENT");
+ ids.add("DE_CASE");
+ ids.add("DE_COMPOUNDS");
+ ids.add("DE_DASH");
+ ids.add("DE_WORD_COHERENCY");
+ ids.add("DE_WIEDER_VS_WIDER");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java
new file mode 100644
index 0000000..e48fb6a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Icelandic.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.language;
+
+/**
+ * @author Anton Karl Ingason
+ */
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Icelandic extends Language {
+
+ private final Tagger tagger = new DemoTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("is");
+ private static final String[] COUNTRIES = { "IS" };
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ @Override
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ @Override
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Anton Karl Ingason")};
+ }
+
+ @Override
+ public String getName() {
+ return "Icelandic";
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ @Override
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+ @Override
+ public String getShortName() {
+ return "is";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java
new file mode 100644
index 0000000..986b7f5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Italian.java
@@ -0,0 +1,74 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.it.ItalianTagger;
+
+public class Italian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "IT", "CH"
+ };
+
+ private final Tagger tagger = new ItalianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Italian";
+ }
+
+ public String getShortName() {
+ return "it";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Paolo Bianchini");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java
new file mode 100644
index 0000000..201a8b5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/LanguageBuilder.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.io.File;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Create a language by specifying the language's XML rule file.
+ *
+ * @author Daniel Naber
+ */
+public class LanguageBuilder {
+
+ private LanguageBuilder() {
+ }
+
+ /**
+ * Takes an XML file named <tt>rules-xx-language.xml</tt>,
+ * e.g. <tt>rules-de-German.xml</tt> and builds
+ * a Language object for that language.
+ */
+ public static Language makeLanguage(final File file) {
+ if (file == null) {
+ throw new NullPointerException("file argument cannot be null");
+ }
+ if (!file.getName().endsWith(".xml")) {
+ throw new RuleFilenameException(file);
+ }
+ final String[] parts = file.getName().split("-");
+ if (parts.length != 3 || !parts[0].equals("rules") || parts[1].length() != 2) {
+ throw new RuleFilenameException(file);
+ }
+
+ final Language newLanguage = new Language() {
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+ public Contributor[] getMaintainers() {
+ return null;
+ }
+ public String getShortName() {
+ return parts[1];
+ }
+ public String[] getCountryVariants() {
+ return new String[] {""};
+ }
+ public String getName() {
+ return parts[2].replace(".xml", "");
+ }
+ public Set<String> getRelevantRuleIDs() {
+ return null;
+ }
+ public String getRuleFileName() {
+ return file.getAbsolutePath();
+ }
+ };
+ return newLanguage;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java
new file mode 100644
index 0000000..6401195
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Lithuanian.java
@@ -0,0 +1,70 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.xx.DemoTagger;
+
+public class Lithuanian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "LT"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Lithuanian";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public String getShortName() {
+ return "lt";
+ }
+
+ public Tagger getTagger() {
+ return new DemoTagger();
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Mantas Kriaučiūnas")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java
new file mode 100644
index 0000000..f15ca5c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Malayalam.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.ml.MalayalamTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.ml.MalayalamWordTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+public class Malayalam extends Language {
+
+ private final Tagger tagger = new MalayalamTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("en");
+ private final Tokenizer wordTokenizer = new MalayalamWordTokenizer();
+
+ private static final String[] COUNTRIES = {"IN"};
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public final String getName() {
+ return "Malayalam";
+ }
+
+ public final String getShortName() {
+ return "ml";
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wordTokenizer;
+ }
+
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Jithesh.V.S")
+ };
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Malayalam...:
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java
new file mode 100644
index 0000000..13b4faf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Polish.java
@@ -0,0 +1,116 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.pl.PolishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.pl.PolishHybridDisambiguator;
+import de.danielnaber.languagetool.tagging.pl.PolishTagger;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+
+public class Polish extends Language {
+
+ private final Tagger tagger = new PolishTagger();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("pl");
+ private final Disambiguator disambiguator = new PolishHybridDisambiguator();
+ private final Synthesizer synthesizer = new PolishSynthesizer();
+ private static final Unifier POLISH_UNIFIER = new Unifier();
+ private static final Unifier POLISH_DISAMB_UNIFIER = new Unifier();
+
+ private static final String[] COUNTRIES = {"PL"};
+
+ @Override
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ @Override
+ public String getName() {
+ return "Polish";
+ }
+
+ @Override
+ public String getShortName() {
+ return "pl";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ @Override
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ @Override
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ @Override
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public Unifier getUnifier() {
+ return POLISH_UNIFIER;
+ }
+
+ public Unifier getDisambiguationUnifier() {
+ return POLISH_DISAMB_UNIFIER;
+ }
+
+ @Override
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ @Override
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Marcin Miłkowski")};
+ }
+
+ @Override
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Polish:
+ ids.add("PL_UNPAIRED_BRACKETS");
+ ids.add("PL_WORD_REPEAT");
+ ids.add("PL_COMPOUNDS");
+ ids.add("PL_SIMPLE_REPLACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java
new file mode 100644
index 0000000..96d6a6b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Romanian.java
@@ -0,0 +1,112 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.ro.CompoundRule;
+import de.danielnaber.languagetool.rules.ro.SimpleReplaceRule;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ro.RomanianSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.ro.RomanianRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.ro.RomanianTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tokenizers.ro.RomanianWordTokenizer;
+
+/**
+ *
+ * @author Ionuț Păduraru
+ * @since 24.02.2009 22:18:21
+ */
+public class Romanian extends Language {
+
+ private static final String[] COUNTRIES = { "RO" };
+
+ private final Tagger tagger = new RomanianTagger();
+ private final Synthesizer synthesizer = new RomanianSynthesizer();
+ private final Disambiguator disambiguator = new RomanianRuleDisambiguator();
+ private final Tokenizer wdTokenizer = new RomanianWordTokenizer();
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ro");
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Romanian";
+ }
+
+ public String getShortName() {
+ return "ro";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Ionuț Păduraru");
+ contributor.setUrl("http://www.archeus.ro");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ // specific to romanian
+ ids.add(SimpleReplaceRule.ROMANIAN_SIMPLE_REPLACE_RULE);
+ ids.add(CompoundRule.ROMANIAN_COMPOUND_RULE);
+
+ return ids;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+ public final Tokenizer getWordTokenizer() {
+ return wdTokenizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java
new file mode 100644
index 0000000..715bdc9
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/RuleFilenameException.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.io.File;
+
+/**
+ * Thrown if external rule filename doesn't match the required format.
+ *
+ * @author Daniel Naber
+ */
+public class RuleFilenameException extends RuntimeException {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 6642163394764392897L;
+
+ public RuleFilenameException(File file) {
+ super("Rule file must be named rules-<xx>-<lang>.xml (<xx> = language code, " +
+ "<lang> = language name),\n" +
+ "for example: rules-en-English.xml\n" +
+ "Current name: " + file.getName());
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java
new file mode 100644
index 0000000..8491d65
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Russian.java
@@ -0,0 +1,114 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Unifier;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.ru.RussianSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.ru.RussianRuleDisambiguator;
+import de.danielnaber.languagetool.tagging.ru.RussianTagger;
+//import de.danielnaber.languagetool.tokenizers.Tokenizer;
+//import de.danielnaber.languagetool.tokenizers.ru.RussianWordTokenizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer; // new Tokenizer
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+// import de.danielnaber.languagetool.tokenizers.ru.RussianSentenceTokenizer; // old Tokenizer
+
+
+public class Russian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "RU"
+ };
+
+ private final Tagger tagger = new RussianTagger();
+ private final Disambiguator disambiguator = new RussianRuleDisambiguator();
+ private static final Unifier RUSSIAN_UNIFIER = new Unifier();
+// private Tokenizer wordTokenizer = new RussianWordTokenizer();
+ private final Synthesizer synthesizer = new RussianSynthesizer();
+// private SentenceTokenizer sentenceTokenizer = new RussianSentenceTokenizer(); // old Tokenizer
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("ru"); // new Tokenizer
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Russian";
+ }
+
+ public String getShortName() {
+ return "ru";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Disambiguator getDisambiguator() {
+ return disambiguator;
+ }
+
+// public Tokenizer getWordTokenizer() {
+// return wordTokenizer;
+// }
+
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+
+ public Unifier getUnifier() {
+ return RUSSIAN_UNIFIER;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Yakov Reztsov")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Russian :
+ ids.add("RU_UNPAIRED_BRACKETS");
+ ids.add("RU_COMPOUNDS");
+ ids.add("RU_SIMPLE_REPLACE");
+ return ids;
+
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java
new file mode 100644
index 0000000..eecb54b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovak.java
@@ -0,0 +1,93 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.sk.SlovakTagger;
+import de.danielnaber.languagetool.synthesis.sk.SlovakSynthesizer;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Slovak extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sk");
+ private final Tagger tagger = new SlovakTagger();
+ private final Synthesizer synthesizer = new SlovakSynthesizer();
+
+ private static final String[] COUNTRIES = {
+ "SK"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Slovak";
+ }
+
+ public String getShortName() {
+ return "sk";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ @Override
+ public Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Zdenko Podobný");
+ contributor.setUrl("http://sk-spell.sk.cx");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Slovak:
+ ids.add("SK_COMPOUNDS");
+ ids.add("SK_VES");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java
new file mode 100644
index 0000000..cc945f3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Slovenian.java
@@ -0,0 +1,75 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Slovenian extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("sl");
+
+ private static final String[] COUNTRIES = {
+ "SI"
+ };
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Slovenian";
+ }
+
+ public String getShortName() {
+ return "sl";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Martin Srebotnjak")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Slovenian: none
+ return ids;
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java
new file mode 100644
index 0000000..ba646d6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Spanish.java
@@ -0,0 +1,94 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.es.SpanishSynthesizer;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.es.SpanishTagger;
+import de.danielnaber.languagetool.tokenizers.SRXSentenceTokenizer;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+public class Spanish extends Language {
+
+ private final SentenceTokenizer sentenceTokenizer = new SRXSentenceTokenizer("es");
+ private final Synthesizer synthesizer = new SpanishSynthesizer();
+
+ private static final String[] COUNTRIES = {
+ "ES", "", "MX", "GT", "CR", "PA", "DO",
+ "VE", "PE", "AR", "EC", "CL", "UY", "PY",
+ "BO", "SV", "HN", "NI", "PR", "US", "CU"
+ };
+
+ private final Tagger tagger = new SpanishTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Spanish";
+ }
+
+ public String getShortName() {
+ return "es";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Synthesizer getSynthesizer() {
+ return synthesizer;
+ }
+
+ public final SentenceTokenizer getSentenceTokenizer() {
+ return sentenceTokenizer;
+ }
+
+ public Contributor[] getMaintainers() {
+ final Contributor contributor = new Contributor("Juan Martorell");
+ contributor.setUrl("http://languagetool-es.blogspot.com/");
+ return new Contributor[] { contributor };
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Spanish:
+ // ids.add("EL_WITH_FEM");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java
new file mode 100644
index 0000000..1b99f9a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Swedish.java
@@ -0,0 +1,75 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.sv.SwedishTagger;
+
+public class Swedish extends Language {
+
+ private static final String[] COUNTRIES = {
+ "SE", "FI"
+ };
+
+ private final Tagger tagger = new SwedishTagger();
+
+ public final Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public final String getName() {
+ return "Swedish";
+ }
+
+ public final String getShortName() {
+ return "sv";
+ }
+
+ @Override
+ public final String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public final Tagger getTagger() {
+ return tagger;
+ }
+
+ public final Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Niklas Johansson")};
+ }
+
+ public final Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UNPAIRED_BRACKETS");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WORD_REPEAT_RULE");
+ ids.add("WHITESPACE_RULE");
+ // specific to Swedish:
+ ids.add("SV_COMPOUNDS");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java
new file mode 100644
index 0000000..c426100
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/language/Ukrainian.java
@@ -0,0 +1,73 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.uk.UkrainianTagger;
+
+public class Ukrainian extends Language {
+
+ private static final String[] COUNTRIES = {
+ "UA"
+ };
+
+ private final Tagger tagger = new UkrainianTagger();
+
+ public Locale getLocale() {
+ return new Locale(getShortName());
+ }
+
+ public String getName() {
+ return "Ukrainian";
+ }
+
+ public String getShortName() {
+ return "uk";
+ }
+
+ @Override
+ public String[] getCountryVariants() {
+ return COUNTRIES;
+ }
+
+ public Tagger getTagger() {
+ return tagger;
+ }
+
+ public Contributor[] getMaintainers() {
+ return new Contributor[] {new Contributor("Andriy Rysin")};
+ }
+
+ public Set<String> getRelevantRuleIDs() {
+ final Set<String> ids = new HashSet<String>();
+ ids.add("COMMA_PARENTHESIS_WHITESPACE");
+ ids.add("DOUBLE_PUNCTUATION");
+ ids.add("UPPERCASE_SENTENCE_START");
+ ids.add("WHITESPACE_RULE");
+ // specific to Ukrainian:
+ ids.add("UK_SIMPLE_REPLACE");
+ return ids;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/ConfigThread.java b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/ConfigThread.java
new file mode 100644
index 0000000..52aae8b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/ConfigThread.java
@@ -0,0 +1,78 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.openoffice;
+
+import java.util.Set;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.gui.Configuration;
+import de.danielnaber.languagetool.gui.ConfigurationDialog;
+
+/**
+ * A thread that shows the configuration dialog which lets the
+ * user enable/disable rules.
+ *
+ * @author Marcin Miłkowski
+ * @author Daniel Naber
+ */
+class ConfigThread extends Thread {
+
+ private final Language docLanguage;
+ private final Configuration config;
+ private final de.danielnaber.languagetool.openoffice.Main mainThread;
+
+ private final ConfigurationDialog cfgDialog;
+
+ ConfigThread(final Language docLanguage, final Configuration config,
+ final de.danielnaber.languagetool.openoffice.Main main) {
+ this.docLanguage = docLanguage;
+ this.config = config;
+ mainThread = main;
+ cfgDialog = new ConfigurationDialog(null, true);
+ cfgDialog.setDisabledRules(config.getDisabledRuleIds());
+ cfgDialog.setEnabledRules(config.getEnabledRuleIds());
+ cfgDialog.setDisabledCategories(config.getDisabledCategoryNames());
+ cfgDialog.setMotherTongue(config.getMotherTongue());
+ }
+
+ public Set<String> getDisabledRuleIds() {
+ return cfgDialog.getDisabledRuleIds();
+ }
+
+ public void run() {
+ try {
+ final JLanguageTool langTool = new JLanguageTool(docLanguage, cfgDialog.getMotherTongue());
+ langTool.activateDefaultPatternRules();
+ langTool.activateDefaultFalseFriendRules();
+ cfgDialog.show(langTool.getAllRules());
+ config.setDisabledRuleIds(cfgDialog.getDisabledRuleIds());
+ config.setEnabledRuleIds(cfgDialog.getEnabledRuleIds());
+ config.setDisabledCategoryNames(cfgDialog.getDisabledCategoryNames());
+ config.setMotherTongue(cfgDialog.getMotherTongue());
+ config.saveConfiguration();
+ if (mainThread != null) {
+ mainThread.resetDocument();
+ }
+ } catch (Throwable e) {
+ Main.showError(e);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/Main.java b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/Main.java
new file mode 100644
index 0000000..3eaecda
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/Main.java
@@ -0,0 +1,760 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.openoffice;
+
+/** OpenOffice 3.x Integration
+ *
+ * @author Marcin Miłkowski
+ */
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import javax.swing.JOptionPane;
+import javax.swing.UIManager;
+
+import com.sun.star.awt.XWindow;
+import com.sun.star.awt.XWindowPeer;
+import com.sun.star.beans.PropertyValue;
+import com.sun.star.beans.XPropertySet;
+import com.sun.star.frame.XDesktop;
+import com.sun.star.frame.XModel;
+import com.sun.star.lang.IllegalArgumentException;
+import com.sun.star.lang.Locale;
+import com.sun.star.lang.XComponent;
+import com.sun.star.lang.XMultiComponentFactory;
+import com.sun.star.lang.XServiceDisplayName;
+import com.sun.star.lang.XServiceInfo;
+import com.sun.star.lang.XSingleComponentFactory;
+import com.sun.star.lib.uno.helper.Factory;
+import com.sun.star.lib.uno.helper.WeakBase;
+import com.sun.star.linguistic2.ProofreadingResult;
+import com.sun.star.linguistic2.SingleProofreadingError;
+import com.sun.star.linguistic2.XLinguServiceEventBroadcaster;
+import com.sun.star.linguistic2.XLinguServiceEventListener;
+import com.sun.star.linguistic2.XProofreader;
+import com.sun.star.registry.XRegistryKey;
+import com.sun.star.task.XJobExecutor;
+import com.sun.star.text.XTextViewCursor;
+import com.sun.star.text.XTextViewCursorSupplier;
+import com.sun.star.uno.UnoRuntime;
+import com.sun.star.uno.XComponentContext;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.gui.Configuration;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+public class Main extends WeakBase implements XJobExecutor,
+ XServiceDisplayName, XServiceInfo, XProofreader,
+ XLinguServiceEventBroadcaster {
+
+ private Configuration config;
+ private JLanguageTool langTool;
+ private Language docLanguage;
+
+ private String docID;
+
+ /*
+ * Rules disabled using the config dialog box rather than Spelling dialog box
+ * or the context menu.
+ */
+ private Set<String> disabledRules;
+
+ private Set<String> disabledRulesUI;
+
+ private List<XLinguServiceEventListener> xEventListeners;
+
+ /**
+ * Make another instance of JLanguageTool and assign it to langTool if true.
+ */
+ private boolean recheck;
+
+ /**
+ * Sentence tokenization-related members.
+ */
+
+ private String currentPara;
+ private List<String> tokenizedSentences;
+ private int position;
+ private List<RuleMatch> paragraphMatches;
+
+ /**
+ * Service name required by the OOo API && our own name.
+ */
+ private static final String[] SERVICE_NAMES = {
+ "com.sun.star.linguistic2.Proofreader",
+ "de.danielnaber.languagetool.openoffice.Main" };
+
+ // use a different name than the stand-alone version to avoid conflicts:
+ private static final String CONFIG_FILE = ".languagetool-ooo.cfg";
+
+ private static final ResourceBundle MESSAGES = JLanguageTool
+ .getMessageBundle();
+
+ private XComponentContext xContext;
+
+ public Main(final XComponentContext xCompContext) {
+ try {
+ changeContext(xCompContext);
+ final File homeDir = getHomeDir();
+ config = new Configuration(homeDir, CONFIG_FILE);
+ disabledRules = config.getDisabledRuleIds();
+ if (disabledRules == null) {
+ disabledRules = new HashSet<String>();
+ }
+ disabledRulesUI = new HashSet<String>(disabledRules);
+ xEventListeners = new ArrayList<XLinguServiceEventListener>();
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ }
+
+ public final void changeContext(final XComponentContext xCompContext) {
+ xContext = xCompContext;
+ }
+
+ private XComponent getxComponent() {
+ try {
+ final XMultiComponentFactory xMCF = xContext.getServiceManager();
+ final Object desktop = xMCF.createInstanceWithContext(
+ "com.sun.star.frame.Desktop", xContext);
+ final XDesktop xDesktop = (XDesktop) UnoRuntime.queryInterface(
+ XDesktop.class, desktop);
+ return xDesktop.getCurrentComponent();
+ } catch (final Throwable t) {
+ showError(t);
+ return null;
+ }
+ }
+
+ /**
+ * Checks the language under the cursor. Used for opening the configuration
+ * dialog.
+ *
+ * @return Language - the language under the visible cursor.
+ */
+ private Language getLanguage() {
+ final XComponent xComponent = getxComponent();
+ if (xComponent == null) {
+ return Language.ENGLISH; // for testing with local main() method only
+ }
+ final Locale charLocale;
+ final XPropertySet xCursorProps;
+ try {
+ final XModel model = (XModel) UnoRuntime.queryInterface(XModel.class,
+ xComponent);
+ final XTextViewCursorSupplier xViewCursorSupplier = (XTextViewCursorSupplier) UnoRuntime
+ .queryInterface(XTextViewCursorSupplier.class, model
+ .getCurrentController());
+ final XTextViewCursor xCursor = xViewCursorSupplier.getViewCursor();
+ if (xCursor.isCollapsed()) { // no text selection
+ xCursorProps = (XPropertySet) UnoRuntime.queryInterface(
+ XPropertySet.class, xCursor);
+ } else { // text is selected, need to create another cursor
+ // as multiple languages can occur here - we care only
+ // about character under the cursor, which might be wrong
+ // but it applies only to the checking dialog to be removed
+ xCursorProps = (XPropertySet) UnoRuntime.queryInterface(
+ XPropertySet.class, xCursor.getText().createTextCursorByRange(
+ xCursor.getStart()));
+ }
+ final Object obj = xCursorProps.getPropertyValue("CharLocale");
+ if (obj == null) {
+ return Language.ENGLISH; // fallback
+ }
+ charLocale = (Locale) obj;
+ boolean langIsSupported = false;
+ for (Language element : Language.LANGUAGES) {
+ if (element.getShortName().equals(charLocale.Language)) {
+ langIsSupported = true;
+ break;
+ }
+ }
+ if (!langIsSupported) {
+ // FIXME: i18n
+ JOptionPane.showMessageDialog(null,
+ "Error: Sorry, the document language '" + charLocale.Language
+ + "' is not supported by LanguageTool.");
+ return null;
+ }
+ } catch (final Throwable t) {
+ showError(t);
+ return null;
+ }
+ return Language.getLanguageForShortName(charLocale.Language);
+ }
+
+ /**
+ * Runs the grammar checker on paragraph text.
+ *
+ * @param docID - document ID
+ * @param paraText - paragraph text
+ * @param locale Locale - the text Locale
+ * @param startOfSentencePos start of sentence position
+ * @param nSuggestedBehindEndOfSentencePosition end of sentence position
+ * @param props - properties
+ * @return ProofreadingResult containing the results of the check.
+ * @throws IllegalArgumentException
+ * (not really, LT simply returns the ProofreadingResult with the
+ * values supplied)
+ */
+ public final ProofreadingResult doProofreading(final String docID,
+ final String paraText, final Locale locale, final int startOfSentencePos,
+ final int nSuggestedBehindEndOfSentencePosition,
+ final PropertyValue[] props) {
+ final ProofreadingResult paRes = new ProofreadingResult();
+ try {
+ paRes.nStartOfSentencePosition = startOfSentencePos;
+ paRes.xProofreader = this;
+ paRes.aLocale = locale;
+ paRes.aDocumentIdentifier = docID;
+ paRes.aText = paraText;
+ paRes.aProperties = props;
+ return doGrammarCheckingInternal(paraText, locale, paRes);
+ } catch (final Throwable t) {
+ showError(t);
+ return paRes;
+ }
+ }
+
+ synchronized private ProofreadingResult doGrammarCheckingInternal(
+ final String paraText, final Locale locale, final ProofreadingResult paRes) {
+
+ if (!StringTools.isEmpty(paraText)
+ && hasLocale(locale)) {
+ // caching the instance of LT
+ if (!Language.getLanguageForShortName(locale.Language).equals(
+ docLanguage)
+ || langTool == null || recheck) {
+ docLanguage = Language.getLanguageForShortName(locale.Language);
+ if (docLanguage == null) {
+ return paRes;
+ }
+ try {
+ langTool = new JLanguageTool(docLanguage, config.getMotherTongue());
+ langTool.activateDefaultPatternRules();
+ langTool.activateDefaultFalseFriendRules();
+ recheck = false;
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ }
+
+ if (config.getDisabledRuleIds() != null) {
+ for (final String id : config.getDisabledRuleIds()) {
+ langTool.disableRule(id);
+ }
+ }
+ final Set<String> disabledCategories = config
+ .getDisabledCategoryNames();
+ if (disabledCategories != null) {
+ for (final String categoryName : disabledCategories) {
+ langTool.disableCategory(categoryName);
+ }
+ }
+ final Set<String> enabledRules = config.getEnabledRuleIds();
+ if (enabledRules != null) {
+ for (String ruleName : enabledRules) {
+ langTool.enableDefaultOffRule(ruleName);
+ langTool.enableRule(ruleName);
+ }
+ }
+ try {
+ final String sentence = getSentence(paraText,
+ paRes.nStartOfSentencePosition);
+ paRes.nStartOfSentencePosition = position;
+ paRes.nStartOfNextSentencePosition = position + sentence.length();
+ paRes.nBehindEndOfSentencePosition = paRes.nStartOfNextSentencePosition;
+ if (!StringTools.isEmpty(sentence)) {
+ final List<RuleMatch> ruleMatches = langTool.check(sentence, false,
+ JLanguageTool.paragraphHandling.ONLYNONPARA);
+ final SingleProofreadingError[] pErrors = checkParaRules(paraText,
+ locale, paRes.nStartOfSentencePosition,
+ paRes.nStartOfNextSentencePosition, paRes.aDocumentIdentifier);
+ int pErrorCount = 0;
+ if (pErrors != null) {
+ pErrorCount = pErrors.length;
+ }
+ if (!ruleMatches.isEmpty()) {
+ final SingleProofreadingError[] errorArray = new SingleProofreadingError[ruleMatches
+ .size()
+ + pErrorCount];
+ int i = 0;
+ for (final RuleMatch myRuleMatch : ruleMatches) {
+ errorArray[i] = createOOoError(myRuleMatch, paRes.nStartOfSentencePosition);
+ i++;
+ }
+ // add para matches
+ if (pErrors != null) {
+ for (SingleProofreadingError paraError : pErrors) {
+ if (paraError != null) {
+ errorArray[i] = paraError;
+ i++;
+ }
+ }
+ }
+ Arrays.sort(errorArray, new ErrorPositionComparator());
+ paRes.aErrors = errorArray;
+
+ } else {
+ if (pErrors != null) {
+ paRes.aErrors = pErrors;
+ }
+ }
+ }
+ } catch (final Throwable t) {
+ showError(t);
+ paRes.nBehindEndOfSentencePosition = paraText.length();
+ }
+ }
+ return paRes;
+ }
+
+ synchronized private String getSentence(final String paraText,
+ final int startPos) {
+ if (paraText.equals(currentPara) && tokenizedSentences != null) {
+ int i = 0;
+ int index = -1;
+ while (index < startPos && i < tokenizedSentences.size()) {
+ index += tokenizedSentences.get(i).length();
+ if (index < startPos) {
+ i++;
+ }
+ }
+ position = index + 1;
+ if (i < tokenizedSentences.size()) {
+ position -= tokenizedSentences.get(i).length();
+ return tokenizedSentences.get(i);
+ }
+ return "";
+ }
+ currentPara = paraText;
+ tokenizedSentences = langTool.sentenceTokenize(paraText);
+ position = 0;
+ if (!tokenizedSentences.isEmpty()) {
+ return tokenizedSentences.get(0);
+ }
+ return "";
+ }
+
+ synchronized private SingleProofreadingError[] checkParaRules(
+ final String paraText, final Locale locale, final int startPos,
+ final int endPos, final String docID) {
+ if (startPos == 0) {
+ try {
+ paragraphMatches = langTool.check(paraText, false,
+ JLanguageTool.paragraphHandling.ONLYPARA);
+ this.docID = docID;
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ }
+ if (paragraphMatches != null && !paragraphMatches.isEmpty()
+ && docID.equals(this.docID)) {
+ final List<SingleProofreadingError> errorList = new ArrayList<SingleProofreadingError>(
+ paragraphMatches.size());
+ for (final RuleMatch myRuleMatch : paragraphMatches) {
+ final int startErrPos = myRuleMatch.getFromPos();
+ final int endErrPos = myRuleMatch.getToPos();
+ if (startErrPos >= startPos && startErrPos < endPos
+ && endErrPos >= startPos && endErrPos < endPos) {
+ errorList.add(createOOoError(myRuleMatch, 0));
+ }
+ }
+ if (!errorList.isEmpty()) {
+ final SingleProofreadingError[] errorArray = errorList.toArray(new SingleProofreadingError[errorList.size()]);
+ Arrays.sort(errorArray, new ErrorPositionComparator());
+ return errorArray;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Creates a SingleGrammarError object for use in OOo.
+ * @param myMatch
+ * ruleMatch - LT rule match
+ *
+ * @return SingleGrammarError - object for OOo checker integration
+ */
+ private SingleProofreadingError createOOoError(final RuleMatch myMatch,
+ final int startIndex) {
+ final SingleProofreadingError aError = new SingleProofreadingError();
+ aError.nErrorType = com.sun.star.text.TextMarkupType.PROOFREADING;
+ // the API currently has no support for formatting text in comments
+ final String comment = myMatch.getMessage()
+ .replaceAll("<suggestion>", "\"").replaceAll("</suggestion>", "\"")
+ .replaceAll("([\r]*\n)", " "); // convert line ends to spaces
+ aError.aFullComment = comment;
+ // not all rules have short comments
+ if (!StringTools.isEmpty(myMatch.getShortMessage())) {
+ aError.aShortComment = myMatch.getShortMessage();
+ } else {
+ aError.aShortComment = aError.aFullComment;
+ }
+ aError.aSuggestions = myMatch.getSuggestedReplacements().toArray(
+ new String[myMatch.getSuggestedReplacements().size()]);
+ aError.nErrorStart = myMatch.getFromPos() + startIndex;
+ aError.nErrorLength = myMatch.getToPos() - myMatch.getFromPos();
+ aError.aRuleIdentifier = myMatch.getRule().getId();
+ aError.aProperties = new PropertyValue[0];
+ return aError;
+ }
+
+ /**
+ * LT does not support spell-checking, so we return false.
+ *
+ * @return false
+ */
+ public final boolean isSpellChecker() {
+ return false;
+ }
+
+ /**
+ * Runs LT options dialog box.
+ **/
+ public final void runOptionsDialog() {
+ final Language lang = getLanguage();
+ if (lang == null) {
+ return;
+ }
+ final ConfigThread configThread = new ConfigThread(lang, config, this);
+ configThread.start();
+ }
+
+ /**
+ * @return An array of Locales supported by LT.
+ */
+ public final Locale[] getLocales() {
+ try {
+ int dims = 0;
+ for (final Language element : Language.LANGUAGES) {
+ dims += element.getCountryVariants().length;
+ }
+ final Locale[] aLocales = new Locale[dims];
+ int cnt = 0;
+ for (final Language element : Language.LANGUAGES) {
+ for (final String variant : element.getCountryVariants()) {
+ aLocales[cnt] = new Locale(element.getShortName(), variant, "");
+ cnt++;
+ }
+ }
+ return aLocales;
+ } catch (final Throwable t) {
+ showError(t);
+ return new Locale[0];
+ }
+ }
+
+ /**
+ * @return true if LT supports the language of a given locale.
+ * @param locale
+ * The Locale to check.
+ */
+ public final boolean hasLocale(final Locale locale) {
+ try {
+ for (final Language element : Language.LANGUAGES) {
+ if (element.getShortName().equals(locale.Language)) {
+ return true;
+ }
+ }
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ return false;
+ }
+
+ /**
+ * Add a listener that allow re-checking the document after changing the
+ * options in the configuration dialog box.
+ *
+ * @param xLinEvLis
+ * - the listener to be added
+ * @return true if listener is non-null and has been added, false otherwise.
+ */
+ public final boolean addLinguServiceEventListener(
+ final XLinguServiceEventListener xLinEvLis) {
+ if (xLinEvLis == null) {
+ return false;
+ }
+ xEventListeners.add(xLinEvLis);
+ return true;
+ }
+
+ /**
+ * Remove a listener from the event listeners list.
+ *
+ * @param xLinEvLis
+ * - the listener to be removed
+ * @return true if listener is non-null and has been removed, false otherwise.
+ */
+ public final boolean removeLinguServiceEventListener(
+ final XLinguServiceEventListener xLinEvLis) {
+ if (xLinEvLis == null) {
+ return false;
+ }
+ if (xEventListeners.contains(xLinEvLis)) {
+ xEventListeners.remove(xLinEvLis);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Inform listener (grammar checking iterator) that options have changed and
+ * the doc should be rechecked.
+ *
+ */
+ public final void resetDocument() {
+ if (!xEventListeners.isEmpty()) {
+ for (final XLinguServiceEventListener xEvLis : xEventListeners) {
+ if (xEvLis != null) {
+ final com.sun.star.linguistic2.LinguServiceEvent xEvent = new com.sun.star.linguistic2.LinguServiceEvent();
+ xEvent.nEvent = com.sun.star.linguistic2.LinguServiceEventFlags.PROOFREAD_AGAIN;
+ xEvLis.processLinguServiceEvent(xEvent);
+ }
+ }
+ recheck = true;
+ disabledRules = config.getDisabledRuleIds();
+ if (disabledRules == null) {
+ disabledRules = new HashSet<String>();
+ }
+ }
+ }
+
+ public String[] getSupportedServiceNames() {
+ return getServiceNames();
+ }
+
+ public static String[] getServiceNames() {
+ return SERVICE_NAMES;
+ }
+
+ public boolean supportsService(final String sServiceName) {
+ for (final String sName : SERVICE_NAMES) {
+ if (sServiceName.equals(sName)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public String getImplementationName() {
+ return Main.class.getName();
+ }
+
+ public static XSingleComponentFactory __getComponentFactory(
+ final String sImplName) {
+ SingletonFactory xFactory = null;
+ if (sImplName.equals(Main.class.getName())) {
+ xFactory = new SingletonFactory();
+ }
+ return xFactory;
+ }
+
+ public static boolean __writeRegistryServiceInfo(final XRegistryKey regKey) {
+ return Factory.writeRegistryServiceInfo(Main.class.getName(), Main
+ .getServiceNames(), regKey);
+ }
+
+ public void trigger(final String sEvent) {
+ if (!javaVersionOkay()) {
+ return;
+ }
+ try {
+ if ("configure".equals(sEvent)) {
+ runOptionsDialog();
+ } else if ("about".equals(sEvent)) {
+ final AboutDialogThread aboutThread = new AboutDialogThread(MESSAGES);
+ aboutThread.start();
+ } else {
+ System.err.println("Sorry, don't know what to do, sEvent = " + sEvent);
+ }
+ } catch (final Throwable e) {
+ showError(e);
+ }
+ }
+
+ private boolean javaVersionOkay() {
+ final String version = System.getProperty("java.version");
+ if (version != null
+ && (version.startsWith("1.0") || version.startsWith("1.1")
+ || version.startsWith("1.2") || version.startsWith("1.3") || version
+ .startsWith("1.4"))) {
+ final DialogThread dt = new DialogThread(
+ "Error: LanguageTool requires Java 1.5 or later. Current version: "
+ + version);
+ dt.start();
+ return false;
+ }
+ try {
+ for (UIManager.LookAndFeelInfo info : UIManager
+ .getInstalledLookAndFeels()) {
+ if ("Nimbus".equals(info.getName())) {
+ UIManager.setLookAndFeel(info.getClassName());
+ break;
+ }
+ }
+ } catch (Exception ex) {
+ // Well, what can we do...
+ }
+
+ return true;
+ }
+
+ static void showError(final Throwable e) {
+ final String metaInfo = "OS: " + System.getProperty("os.name")
+ + " on " + System.getProperty("os.arch") + ", Java version "
+ + System.getProperty("java.vm.version")
+ + " from " + System.getProperty("java.vm.vendor");
+ String msg = "An error has occurred in LanguageTool " + JLanguageTool.VERSION + ":\n" + e.toString()
+ + "\nStacktrace:\n";
+ final StackTraceElement[] elem = e.getStackTrace();
+ for (final StackTraceElement element : elem) {
+ msg += element.toString() + "\n";
+ }
+ msg += metaInfo;
+ final DialogThread dt = new DialogThread(msg);
+ dt.start();
+ // e.printStackTrace();
+ // OOo crashes when we throw an Exception :-(
+ // throw new RuntimeException(e);
+ }
+
+ private File getHomeDir() {
+ final String homeDir = System.getProperty("user.home");
+ if (homeDir == null) {
+ @SuppressWarnings({"ThrowableInstanceNeverThrown"})
+ final RuntimeException ex = new RuntimeException("Could not get home directory");
+ showError(ex);
+ }
+ return new File(homeDir);
+ }
+
+ private class AboutDialogThread extends Thread {
+
+ private final ResourceBundle messages;
+
+ AboutDialogThread(final ResourceBundle messages) {
+ this.messages = messages;
+ }
+
+ @Override
+ public void run() {
+ final XModel model = (XModel) UnoRuntime.queryInterface(XModel.class,
+ getxComponent());
+ final XWindow parentWindow = model.getCurrentController().getFrame()
+ .getContainerWindow();
+ final XWindowPeer parentWindowPeer = (XWindowPeer) UnoRuntime
+ .queryInterface(XWindowPeer.class, parentWindow);
+ final OOoAboutDialog about = new OOoAboutDialog(messages,
+ parentWindowPeer);
+ about.show();
+ }
+ }
+
+ public void ignoreRule(final String ruleId, final Locale locale)
+ throws IllegalArgumentException {
+ // TODO: config should be locale-dependent
+ disabledRulesUI.add(ruleId);
+ config.setDisabledRuleIds(disabledRulesUI);
+ try {
+ config.saveConfiguration();
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ recheck = true;
+ }
+
+ /**
+ * Called on rechecking the document - resets the ignore status for rules that
+ * was set in the spelling dialog box or in the context menu.
+ *
+ * The rules disabled in the config dialog box are left as intact.
+ */
+ public void resetIgnoreRules() {
+ config.setDisabledRuleIds(disabledRules);
+ try {
+ config.saveConfiguration();
+ } catch (final Throwable t) {
+ showError(t);
+ }
+ recheck = true;
+ }
+
+ public String getServiceDisplayName(Locale locale) {
+ return "LanguageTool";
+ }
+
+}
+
+/**
+ * A simple comparator for sorting errors by their position.
+ *
+ */
+class ErrorPositionComparator implements Comparator<SingleProofreadingError> {
+
+ public int compare(final SingleProofreadingError match1,
+ final SingleProofreadingError match2) {
+ if (match1.aSuggestions.length == 0
+ && match2.aSuggestions.length > 0) {
+ return 1;
+ }
+ if (match2.aSuggestions.length == 0
+ && match1.aSuggestions.length > 0) {
+ return -1;
+ }
+ final int error1pos = match1.nErrorStart;
+ final int error2pos = match2.nErrorStart;
+ if (error1pos > error2pos)
+ return 1;
+ else if (error1pos < error2pos)
+ return -1;
+ else
+ if (match1.aSuggestions.length != 0
+ && match2.aSuggestions.length != 0
+ && match1.aSuggestions.length
+ != match2.aSuggestions.length) {
+ return ((Integer) (match1.aSuggestions.length))
+ .compareTo(match2.aSuggestions.length);
+ }
+ return match1.aRuleIdentifier.compareTo(match2.aRuleIdentifier);
+ }
+}
+
+class DialogThread extends Thread {
+ final private String text;
+
+ DialogThread(final String text) {
+ this.text = text;
+ }
+
+ @Override
+ public void run() {
+ JOptionPane.showMessageDialog(null, text);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/OOoAboutDialog.java b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/OOoAboutDialog.java
new file mode 100644
index 0000000..35fbb2c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/OOoAboutDialog.java
@@ -0,0 +1,64 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.openoffice;
+
+import com.sun.star.awt.Rectangle;
+import com.sun.star.awt.XMessageBox;
+import com.sun.star.awt.XMessageBoxFactory;
+import com.sun.star.awt.XWindowPeer;
+import com.sun.star.uno.UnoRuntime;
+import de.danielnaber.languagetool.gui.AboutDialog;
+import de.danielnaber.languagetool.tools.StringTools;
+
+import java.util.ResourceBundle;
+
+/**
+ * Dialog that display version and copyright information.
+ *
+ * @author Marcin Miłkowski
+ */
+public class OOoAboutDialog extends AboutDialog {
+
+ private final XWindowPeer winPeer;
+
+ public OOoAboutDialog(final ResourceBundle messages,
+ final XWindowPeer parentWindowPeer) {
+ super(messages);
+ winPeer = parentWindowPeer;
+ }
+
+ @Override
+ public void show() {
+ final String aboutDialogTitle = StringTools.getLabel(messages
+ .getString("guiMenuAbout"));
+ final XMessageBoxFactory messageBoxFactory = (XMessageBoxFactory) UnoRuntime
+ .queryInterface(XMessageBoxFactory.class, winPeer.getToolkit());
+ final Rectangle messageBoxRectangle = new Rectangle();
+ final XMessageBox box = messageBoxFactory
+ .createMessageBox(
+ winPeer,
+ messageBoxRectangle,
+ "infobox",
+ 0,
+ aboutDialogTitle,
+ getAboutText());
+ box.execute();
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/SingletonFactory.java b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/SingletonFactory.java
new file mode 100644
index 0000000..ba43cbf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/openoffice/SingletonFactory.java
@@ -0,0 +1,48 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.openoffice;
+
+import com.sun.star.lang.XSingleComponentFactory;
+import com.sun.star.uno.XComponentContext;
+
+/**
+ * This class is a factory that creates only a single instance,
+ * or a singleton, of the Main class. Used for performance
+ * reasons and to allow various parts of code to interact.
+ *
+ * @author Marcin Miłkowski
+ */
+public class SingletonFactory implements XSingleComponentFactory {
+
+ private transient de.danielnaber.languagetool.openoffice.Main instance;
+
+ public final Object createInstanceWithArgumentsAndContext(final Object[] arguments,
+ final XComponentContext xContext) throws com.sun.star.uno.Exception {
+ return createInstanceWithContext(xContext);
+ }
+
+ public final Object createInstanceWithContext(final XComponentContext xContext) throws com.sun.star.uno.Exception {
+ if (instance == null) {
+ instance = new de.danielnaber.languagetool.openoffice.Main(xContext);
+ } else {
+ instance.changeContext(xContext);
+ }
+ return instance;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractCompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractCompoundRule.java
new file mode 100644
index 0000000..8ef9119
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractCompoundRule.java
@@ -0,0 +1,279 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.ResourceBundle;
+import java.util.Set;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Daniel Naber & Marcin Miłkowski (refactoring)
+ */
+
+public abstract class AbstractCompoundRule extends Rule {
+
+ private static final int MAX_TERMS = 5;
+
+ private final Set<String> incorrectCompounds = new HashSet<String>();
+ private final Set<String> noDashSuggestion = new HashSet<String>();
+ private final Set<String> onlyDashSuggestion = new HashSet<String>();
+
+ private String withHyphenMessage;
+ private String asOneMessage;
+ private String withOrWithoutHyphenMessage;
+
+ private String shortDesc;
+
+ /** Compounds with more than maxNoHyphensSize parts should always use hyphens */
+ private int maxUnHyphenatedWordCount = 2;
+
+ /** Flag to indicate if the hyphen is ignored in the text entered by the user.
+ * Set this to false if you want the rule to offer suggestions for words like [ro] "câte-și-trei" (with hyphen), not only for "câte și trei" (with spaces)
+ * This is only available for languages with hyphen as a word separator (ie: not available for english, available for Romanian)
+ * See Language.getWordTokenizer()
+ */
+ private boolean hyphenIgnored = true;
+
+ public AbstractCompoundRule(final ResourceBundle messages) throws IOException {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ public abstract String getId();
+
+ public abstract String getDescription();
+
+ public void setShort(final String shortDescription) {
+ shortDesc = shortDescription;
+ }
+
+ public void setMsg(final String withHyphenMessage, final String asOneMessage, final String withHyphenOrNotMessage) {
+ this.withHyphenMessage = withHyphenMessage;
+ this.asOneMessage = asOneMessage;
+ withOrWithoutHyphenMessage = withHyphenOrNotMessage;
+ }
+
+ public boolean isHyphenIgnored() {
+ return hyphenIgnored;
+ }
+
+ public void setHyphenIgnored(boolean ignoreHyphen) {
+ this.hyphenIgnored = ignoreHyphen;
+ }
+
+ public int getMaxUnHyphenatedWordCount() {
+ return maxUnHyphenatedWordCount;
+ }
+
+ public void setMaxUnHyphenatedWordCount(int maxNoHyphensSize) {
+ this.maxUnHyphenatedWordCount = maxNoHyphensSize;
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ RuleMatch prevRuleMatch = null;
+ final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<AnalyzedTokenReadings>(MAX_TERMS);
+ for (int i = 0; i < tokens.length + MAX_TERMS-1; i++) {
+ AnalyzedTokenReadings token = null;
+ // we need to extend the token list so we find matches at the end of the original list:
+ if (i >= tokens.length)
+ token = new AnalyzedTokenReadings(new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
+ else
+ token = tokens[i];
+ if (i == 0) {
+ addToQueue(token, prevTokens);
+ continue;
+ }
+
+ final StringBuilder sb = new StringBuilder();
+ int j = 0;
+ AnalyzedTokenReadings firstMatchToken = null;
+ final List<String> stringsToCheck = new ArrayList<String>();
+ final List<String> origStringsToCheck = new ArrayList<String>(); // original upper/lowercase spelling
+ final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<String, AnalyzedTokenReadings>();
+ for (AnalyzedTokenReadings atr : prevTokens) {
+ if (j == 0)
+ firstMatchToken = atr;
+ sb.append(' ');
+ sb.append(atr.getToken());
+ if (j >= 1) {
+ final String stringToCheck = normalize(sb.toString());
+ stringsToCheck.add(stringToCheck);
+ origStringsToCheck.add(sb.toString().trim());
+ if (!stringToToken.containsKey(stringToCheck))
+ stringToToken.put(stringToCheck, atr);
+ }
+ j++;
+ }
+ // iterate backwards over all potentially incorrect strings to make
+ // sure we match longer strings first:
+ for (int k = stringsToCheck.size()-1; k >= 0; k--) {
+ final String stringToCheck = stringsToCheck.get(k);
+ final String origStringToCheck = origStringsToCheck.get(k);
+ if (incorrectCompounds.contains(stringToCheck)) {
+ final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
+ String msg = null;
+ final List<String> replacement = new ArrayList<String>();
+ if (!noDashSuggestion.contains(stringToCheck)) {
+ replacement.add(origStringToCheck.replace(' ', '-'));
+ msg = withHyphenMessage;
+ }
+ // assume that compounds with more than maxUnHyphenatedWordCount (default: two) parts should always use hyphens:
+ if (!hasAllUppercaseParts(origStringToCheck) && countParts(stringToCheck) <= getMaxUnHyphenatedWordCount()
+ && !onlyDashSuggestion.contains(stringToCheck)) {
+ replacement.add(mergeCompound(origStringToCheck));
+ msg = asOneMessage;
+ }
+ final String[] parts = stringToCheck.split(" ");
+ if (parts.length > 0 && parts[0].length() == 1) {
+ replacement.clear();
+ replacement.add(origStringToCheck.replace(' ', '-'));
+ msg = withHyphenMessage;
+ } else if (replacement.isEmpty() || replacement.size() == 2) { // isEmpty shouldn't happen
+ msg = withOrWithoutHyphenMessage;
+ }
+ final RuleMatch ruleMatch = new RuleMatch(this, firstMatchToken.getStartPos(),
+ atr.getStartPos() + atr.getToken().length(), msg, shortDesc);
+ // avoid duplicate matches:
+ if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
+ prevRuleMatch = ruleMatch;
+ break;
+ }
+ prevRuleMatch = ruleMatch;
+ ruleMatch.setSuggestedReplacements(replacement);
+ ruleMatches.add(ruleMatch);
+ break;
+ }
+ }
+ addToQueue(token, prevTokens);
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private String normalize(final String inStr) {
+ String str = inStr.trim().toLowerCase();
+ if (str.indexOf('-') != -1 && str.indexOf(' ') != -1) {
+ if (isHyphenIgnored()) {
+ // e.g. "E-Mail Adresse" -> "E Mail Adresse" so the error can be detected:
+ str = str.replace('-', ' ');
+ } else {
+ str = str.replace(" - ", " ");
+ }
+ }
+ return str;
+ }
+
+ private boolean hasAllUppercaseParts(final String str) {
+ final String[] parts = str.split(" ");
+ for (String part : parts) {
+ if (isHyphenIgnored() || !"-".equals(part)) { // do not treat '-' as an upper-case word
+ if (StringTools.isAllUppercase(part)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ private int countParts(final String str) {
+ return str.split(" ").length;
+ }
+
+ private String mergeCompound(final String str) {
+ final String[] stringParts = str.split(" ");
+ final StringBuilder sb = new StringBuilder();
+ for (int k = 0; k < stringParts.length; k++) {
+ if (isHyphenIgnored() || !"-".equals(stringParts[k])) {
+ if (k == 0)
+ sb.append(stringParts[k]);
+ else
+ sb.append(stringParts[k].toLowerCase());
+ }
+ }
+ return sb.toString();
+ }
+
+ private void addToQueue(final AnalyzedTokenReadings token, final Queue<AnalyzedTokenReadings> prevTokens) {
+ final boolean inserted = prevTokens.offer(token);
+ if (!inserted) {
+ prevTokens.poll();
+ prevTokens.offer(token);
+ }
+ }
+
+ public void loadCompoundFile(final InputStream file, final String encoding) throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, encoding);
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ // the set contains the incorrect spellings, i.e. the ones without hyphen
+ line = line.replace('-', ' ');
+ final String[] parts = line.split(" ");
+ if (parts.length > MAX_TERMS)
+ throw new IOException("Too many compound parts: " + line + ", maximum allowed: " + MAX_TERMS);
+ if (parts.length == 1)
+ throw new IOException("Not a compound: " + line);
+ if (line.endsWith("+")) {
+ line = line.substring(0, line.length() - 1); // cut off "+"
+ noDashSuggestion.add(line.toLowerCase());
+ } else if (line.endsWith("*")) {
+ line = line.substring(0, line.length() - 1); // cut off "*"
+ onlyDashSuggestion.add(line.toLowerCase());
+ }
+ incorrectCompounds.add(line.toLowerCase());
+ }
+ } finally {
+ if (br != null) br.close();
+ if (isr != null) isr.close();
+ }
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractPunctuationCheckRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractPunctuationCheckRule.java
new file mode 100644
index 0000000..89d216b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractPunctuationCheckRule.java
@@ -0,0 +1,93 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * A rule that matches "..", "::", "-," but not "...", "!..", "?!!", ",-" etc.
+ * Languages will have to subclass it and override <code>isPunctsJoinOk()</code>
+ * and <code>isPunctuation()</code> to provide language-specific checking
+ *
+ * @author Andriy Rysin
+ */
+public abstract class AbstractPunctuationCheckRule extends Rule {
+
+ public AbstractPunctuationCheckRule(final ResourceBundle messages) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ public String getId() {
+ return "PUNCTUATION_GENERIC_CHECK";
+ }
+
+ public String getDescription() {
+ return "Use of unusual combination of punctuation characters";
+ }
+
+ protected abstract boolean isPunctsJoinOk(String tkns);
+
+ protected abstract boolean isPunctuation(String token);
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+
+ int startTokenIdx = -1;
+ String tkns = "";
+ for (int i = 0; i < tokens.length; i++) {
+ final String tokenStr = tokens[i].getToken();
+
+ if (isPunctuation(tokenStr)) {
+ tkns += tokenStr;
+
+ if (startTokenIdx == -1)
+ startTokenIdx = i;
+
+ if (i < tokens.length - 1)
+ continue;
+ }
+
+ if (tkns.length() >= 2 && !isPunctsJoinOk(tkns)) {
+ final String msg = "bad duplication or combination of punctuation signs";
+ final RuleMatch ruleMatch = new RuleMatch(this, tokens[startTokenIdx]
+ .getStartPos(),
+ tokens[startTokenIdx].getStartPos() + tkns.length(), msg,
+ "Punctuation problem");
+ ruleMatch.setSuggestedReplacement(tkns.substring(0, 1));
+ ruleMatches.add(ruleMatch);
+ }
+ tkns = "";
+ startTokenIdx = -1;
+ }
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractSimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractSimpleReplaceRule.java
new file mode 100644
index 0000000..13288a2
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/AbstractSimpleReplaceRule.java
@@ -0,0 +1,159 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead. Loads the relevant words from
+ * <code>rules/XX/replace.txt</code>, where XX is a code of the language.
+ *
+ * @author Andriy Rysin
+ */
+public abstract class AbstractSimpleReplaceRule extends Rule {
+
+ private static final String FILE_ENCODING = "utf-8";
+
+ private Map<String, String> wrongWords; // e.g. "вреѿті реѿт" -> "зреѿтою"
+
+ public abstract String getFileName();
+
+ public String getEncoding() {
+ return FILE_ENCODING;
+ }
+
+ /**
+ * Indicates if the rule is case-sensitive. Default value is <code>true</code>.
+ * @return true if the rule is case-sensitive, false otherwise.
+ */
+ public boolean isCaseSensitive() {
+ return true;
+ }
+
+ /**
+ * @return the locale used for case conversion when {@link #isCaseSensitive()} is set to <code>false</code>.
+ */
+ public Locale getLocale() {
+ return Locale.getDefault();
+ }
+
+ public AbstractSimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ wrongWords = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(getFileName()));
+ }
+
+ public String getId() {
+ return "SIMPLE_REPLACE";
+ }
+
+ public String getDescription() {
+ return "Checks for wrong words/phrases";
+ }
+
+ public String getSuggestion() {
+ return " is not valid, use ";
+ }
+
+ public String getShort() {
+ return "Wrong word";
+ }
+
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+
+ final String origToken = token;
+ final String replacement = isCaseSensitive()?wrongWords.get(token):wrongWords.get(token.toLowerCase(getLocale()));
+ if (replacement != null) {
+ final String msg = token + getSuggestion() + replacement;
+ final int pos = tokens[i].getStartPos();
+ final RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos
+ + origToken.length(), msg, getShort());
+ if (!isCaseSensitive() && StringTools.startsWithUppercase(token)) {
+ potentialRuleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(replacement));
+ } else {
+ potentialRuleMatch.setSuggestedReplacement(replacement);
+ }
+ ruleMatches.add(potentialRuleMatch);
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+
+ private Map<String, String> loadWords(final InputStream file) throws IOException {
+ final Map<String, String> map = new HashMap<String, String>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, getEncoding());
+ br = new BufferedReader(isr);
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ final String[] parts = line.split("=");
+ if (parts.length != 2) {
+ throw new IOException("Format error in file "
+ + JLanguageTool.getDataBroker().getFromRulesDirAsUrl(getFileName()) + ", line: " + line);
+ }
+ map.put(parts[0], parts[1]);
+ }
+
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return map;
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Category.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Category.java
new file mode 100644
index 0000000..95a3b44
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Category.java
@@ -0,0 +1,85 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+/**
+ * A rule's category. Categories are used to group rules for
+ * a better overview.
+ *
+ * @author Daniel Naber
+ */
+public class Category {
+
+ private static final int DEFAULT_PRIORITY = 50;
+
+ private int priority;
+ private String name;
+ private boolean defaultOff;
+
+ /**
+ * Create a new category with the given name and priority.
+ * @param name name of the category
+ * @param priority a value between 0 and 100 (inclusive)
+ */
+ public Category(final String name, final int priority) {
+ if (priority < 0 || priority > 100)
+ throw new IllegalArgumentException("priority must be in range 0 - 100");
+ this.name = name;
+ this.priority = priority;
+ }
+
+ /**
+ * Create a new category with the default priority (50).
+ * @param name name of the category
+ */
+ public Category(final String name) {
+ this(name, DEFAULT_PRIORITY);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public int getPriority() {
+ return priority;
+ }
+
+ public String toString() {
+ return name + "(prio=" + priority + ")";
+ }
+
+ /**
+ * Checks whether the category has been turned off
+ * by default by the category author.
+ * @return True if the category is turned off by
+ * default.
+ */
+ public final boolean isDefaultOff() {
+ return defaultOff;
+ }
+
+ /**
+ * Turns the category by default off.
+ **/
+ public final void setDefaultOff() {
+ defaultOff = true;
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/CommaWhitespaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/CommaWhitespaceRule.java
new file mode 100644
index 0000000..0636a1f
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/CommaWhitespaceRule.java
@@ -0,0 +1,170 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * A rule that matches commas and closing parenthesis preceded by whitespace and
+ * opening parenthesis followed by whitespace.
+ *
+ * @author Daniel Naber
+ */
+
+public class CommaWhitespaceRule extends Rule {
+
+ public CommaWhitespaceRule(final ResourceBundle messages) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ public final String getId() {
+ return "COMMA_PARENTHESIS_WHITESPACE";
+ }
+
+ public final String getDescription() {
+ return messages.getString("desc_comma_whitespace");
+ }
+
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ String prevToken = "";
+ String prevPrevToken = "";
+ boolean prevWhite = false;
+ int pos = 0;
+ int prevLen = 0;
+ for (int i = 0; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ final boolean isWhite = tokens[i].isWhitespace()
+ || tokens[i].isFieldCode();
+ pos += token.length();
+ String msg = null;
+ int fixLen = 0;
+ String suggestionText = null;
+ if (isWhite && isLeftBracket(prevToken)) {
+ msg = messages.getString("no_space_after");
+ suggestionText = prevToken;
+ fixLen = 1;
+ } else if (!isWhite && prevToken.equals(",")
+ && isNotQuoteOrHyphen(token)
+ && containsNoNumber(prevPrevToken)
+ && containsNoNumber(token)
+ && !",".equals(prevPrevToken)) {
+ msg = messages.getString("missing_space_after_comma");
+ suggestionText = ", ";
+ } else if (prevWhite) {
+ if (isRightBracket(token)) {
+ msg = messages.getString("no_space_before");
+ suggestionText = token;
+ fixLen = 1;
+ } else if (token.equals(",")) {
+ msg = messages.getString("space_after_comma");
+ suggestionText = ",";
+ fixLen = 1;
+ //exception for duplicated comma (we already have another rule for that)
+ if (i + 1 < tokens.length
+ && ",".equals(tokens[i + 1].getToken())) {
+ msg = null;
+ }
+ } else if (token.equals(".")) {
+ msg = messages.getString("no_space_before_dot");
+ suggestionText = ".";
+ fixLen = 1;
+ // exception case for figures such as ".5" and ellipsis
+ if (i + 1 < tokens.length
+ && isNumberOrDot(tokens[i + 1].getToken())) {
+ msg = null;
+ }
+ }
+ }
+ if (msg != null) {
+ final int fromPos = tokens[i - 1].getStartPos();
+ final int toPos = tokens[i - 1].getStartPos() + fixLen + prevLen;
+ // TODO: add some good short comment here
+ final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos, msg);
+ ruleMatch.setSuggestedReplacement(suggestionText);
+ ruleMatches.add(ruleMatch);
+ }
+ prevPrevToken = prevToken;
+ prevToken = token;
+ prevWhite = isWhite && !tokens[i].isFieldCode(); //OOo code before comma/dot
+ prevLen = tokens[i].getToken().length();
+ }
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ static boolean isNotQuoteOrHyphen(final String str) {
+ if (str.length() == 1) {
+ final char c = str.charAt(0);
+ if (c =='\'' || c == '-' || c == '”'
+ || c =='’' || c == '"' || c == '“'
+ || c == ',') {
+ return false;
+ }
+ } else {
+ if ("&quot".equals(str)) {
+ return false;
+ }
+ return containsNoNumber(str);
+ }
+ return true;
+ }
+
+ static boolean isNumberOrDot(final String str) {
+ final char c = str.charAt(0);
+ return (c == '.' || Character.isDigit(c));
+ }
+
+ static boolean isLeftBracket(final String str) {
+ if (str.length() == 0) {
+ return false;
+ }
+ final char c = str.charAt(0);
+ return (c == '(' || c == '[' || c == '{');
+ }
+
+ static boolean isRightBracket(final String str) {
+ if (str.length() == 0) {
+ return false;
+ }
+ final char c = str.charAt(0);
+ return (c == ')' || c == ']' || c == '}');
+ }
+
+ static boolean containsNoNumber(final String str) {
+ for (int i = 0; i < str.length(); i++) {
+ if (Character.isDigit(str.charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/DoublePunctuationRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/DoublePunctuationRule.java
new file mode 100644
index 0000000..3a6a4e1
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/DoublePunctuationRule.java
@@ -0,0 +1,99 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * A rule that matches ".." (but not "..." etc) and ",,".
+ *
+ * @author Daniel Naber
+ */
+public class DoublePunctuationRule extends Rule {
+
+ public DoublePunctuationRule(final ResourceBundle messages) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ public final String getId() {
+ return "DOUBLE_PUNCTUATION";
+ }
+
+ public final String getDescription() {
+ return messages.getString("desc_double_punct");
+ }
+
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ int startPos = 0;
+ int dotCount = 0;
+ int commaCount = 0;
+ for (int i = 0; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ String nextToken = null;
+ if (i < tokens.length - 1) {
+ nextToken = tokens[i + 1].getToken();
+ }
+ if (".".equals(token)) {
+ dotCount++;
+ commaCount = 0;
+ startPos = tokens[i].getStartPos();
+ } else if (",".equals(token)) {
+ commaCount++;
+ dotCount = 0;
+ startPos = tokens[i].getStartPos();
+ }
+ if (dotCount == 2 && !".".equals(nextToken)) {
+ final String msg = messages.getString("two_dots");
+ final int fromPos = Math.max(0, startPos - 1);
+ final RuleMatch ruleMatch = new RuleMatch(this, fromPos, startPos + 1,
+ msg, messages.getString("double_dots_short"));
+ ruleMatch.setSuggestedReplacement(".");
+ ruleMatches.add(ruleMatch);
+ dotCount = 0;
+ } else if (commaCount == 2 && !",".equals(nextToken)) {
+ final String msg = messages.getString("two_commas");
+ final int fromPos = Math.max(0, startPos);
+ final RuleMatch ruleMatch = new RuleMatch(this, fromPos, startPos + 1,
+ msg, messages.getString("double_commas_short"));
+ ruleMatch.setSuggestedReplacement(",");
+ ruleMatches.add(ruleMatch);
+ commaCount = 0;
+ }
+ if (!".".equals(token) && !",".equals(token)) {
+ dotCount = 0;
+ commaCount = 0;
+ }
+ }
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRule.java
new file mode 100644
index 0000000..a2cd35c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/GenericUnpairedBracketsRule.java
@@ -0,0 +1,314 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tools.UnsyncStack;
+import de.danielnaber.languagetool.tools.SymbolLocator;
+
+/**
+ * Rule that finds unpaired quotes, brackets etc.
+ *
+ * @author Marcin Miłkowski
+ */
+public class GenericUnpairedBracketsRule extends Rule {
+
+ /**
+ * Note that there must be equal length of both arrays, and the sequence of
+ * starting symbols must match exactly the sequence of ending symbols.
+ */
+ private static final String[] START_SYMBOLS = { "[", "(", "{", "\"", "'" };
+ private static final String[] END_SYMBOLS = { "]", ")", "}", "\"", "'" };
+
+ protected String[] startSymbols;
+ protected String[] endSymbols;
+
+ private static final String[] SL_START_SYMBOLS = { "[", "(", "{", "„", "»", "\"" };
+ private static final String[] SL_END_SYMBOLS = { "]", ")", "}", "”", "«", "\"" };
+
+ private static final String[] SK_START_SYMBOLS = { "[", "(", "{", "„", "»", "\"" };
+ private static final String[] SK_END_SYMBOLS = { "]", ")", "}", "“", "«", "\"" };
+
+ private static final String[] RO_START_SYMBOLS = { "[", "(", "{", "„", "«" };
+ private static final String[] RO_END_SYMBOLS = { "]", ")", "}", "”", "»" };
+
+ private static final String[] FR_START_SYMBOLS = { "[", "(", "{", "«", /*"‘"*/ };
+ private static final String[] FR_END_SYMBOLS = { "]", ")", "}", "»", /*"’" used in "d’arm" and many other words */ };
+
+ private static final String[] DE_START_SYMBOLS = { "[", "(", "{", "„", "»", "‘" };
+ private static final String[] DE_END_SYMBOLS = { "]", ")", "}", "“", "«", "’" };
+
+ private static final String[] GL_START_SYMBOLS = { "[", "(", "{", "“", "«", "‘", "\"", "'" };
+ private static final String[] GL_END_SYMBOLS = { "]", ")", "}", "”", "»", "’", "\"", "'" };
+
+ private static final String[] ES_START_SYMBOLS = { "[", "(", "{", "“", "«", "¿", "¡" };
+ private static final String[] ES_END_SYMBOLS = { "]", ")", "}", "”", "»", "?", "!" };
+
+ private static final String[] UK_START_SYMBOLS = { "[", "(", "{", "„", "«" };
+ private static final String[] UK_END_SYMBOLS = { "]", ")", "}", "“", "»" };
+
+ private static final String[] NL_START_SYMBOLS = { "[", "(", "{", "“", "\u2039", "\u201c", "\u201e" };
+ private static final String[] NL_END_SYMBOLS = { "]", ")", "}", "”", "\u203a", "\u201d", "\u201d" };
+
+ private static final String[] IT_START_SYMBOLS = { "[", "(", "{", "»", /*"‘"*/ };
+ private static final String[] IT_END_SYMBOLS = { "]", ")", "}", "«", /*"’"*/ };
+
+ private static final String[] DK_START_SYMBOLS = { "[", "(", "{", "\"", "”" };
+ private static final String[] DK_END_SYMBOLS = { "]", ")", "}", "\"", "”" };
+
+
+
+ /**
+ * The stack for pairing symbols.
+ */
+ protected final UnsyncStack<SymbolLocator> symbolStack = new UnsyncStack<SymbolLocator>();
+
+ /**
+ * Stack of rule matches.
+ */
+ private final UnsyncStack<RuleMatchLocator> ruleMatchStack = new UnsyncStack<RuleMatchLocator>();
+
+ private boolean endOfParagraph;
+
+ private final Language ruleLang;
+
+ private static final Pattern PUNCTUATION = Pattern.compile("\\p{Punct}");
+ private static final Pattern PUNCTUATION_NO_DOT = Pattern
+ .compile("[\\p{Punct}&&[^\\.]]");
+ private static final Pattern NUMERALS = Pattern
+ .compile("(?i)\\d{1,2}?[a-z']*|M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
+
+ private int ruleMatchIndex;
+ private List<RuleMatch> ruleMatches;
+
+ public GenericUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+
+ setParagraphBackTrack(true);
+ if (language.equals(Language.SLOVAK)) {
+ startSymbols = SK_START_SYMBOLS;
+ endSymbols = SK_END_SYMBOLS; }
+ else if (language.equals(Language.SLOVENIAN)) {
+ startSymbols = SL_START_SYMBOLS;
+ endSymbols = SL_END_SYMBOLS;
+ } else if (language.equals(Language.FRENCH)) {
+ startSymbols = FR_START_SYMBOLS;
+ endSymbols = FR_END_SYMBOLS;
+ } else if (language.equals(Language.GERMAN)) {
+ startSymbols = DE_START_SYMBOLS;
+ endSymbols = DE_END_SYMBOLS;
+ } else if (language.equals(Language.GALICIAN)) {
+ startSymbols = GL_START_SYMBOLS;
+ endSymbols = GL_END_SYMBOLS;
+ } else if (language.equals(Language.DUTCH)) {
+ startSymbols = NL_START_SYMBOLS;
+ endSymbols = NL_END_SYMBOLS;
+ } else if (language.equals(Language.SPANISH)) {
+ startSymbols = ES_START_SYMBOLS;
+ endSymbols = ES_END_SYMBOLS;
+ } else if (language.equals(Language.UKRAINIAN)) {
+ startSymbols = UK_START_SYMBOLS;
+ endSymbols = UK_END_SYMBOLS;
+ } else if (language.equals(Language.ITALIAN)) {
+ startSymbols = IT_START_SYMBOLS;
+ endSymbols = IT_END_SYMBOLS;
+ } else if (language.equals(Language.ROMANIAN)) {
+ startSymbols = RO_START_SYMBOLS;
+ endSymbols = RO_END_SYMBOLS;
+ } else if (language.equals(Language.DANISH)) {
+ startSymbols = DK_START_SYMBOLS;
+ endSymbols = DK_END_SYMBOLS;
+ } else {
+ startSymbols = START_SYMBOLS;
+ endSymbols = END_SYMBOLS;
+ }
+
+ ruleLang = language;
+ }
+
+ public String getId() {
+ return "UNPAIRED_BRACKETS";
+ }
+
+ public String getDescription() {
+ return messages.getString("desc_unpaired_brackets");
+ }
+
+ /**
+ * Generic method to specify an exception. For unspecified
+ * language, it simply returns true, which means no exception.
+ * @param token
+ * String token
+ * @param tokens
+ * Sentence tokens
+ * @param i
+ * Current token index
+ * @param precSpace
+ * boolean: is preceded with space
+ * @param follSpace
+ * boolean: is followed with space
+ * @return
+ */
+ protected boolean isNoException(final String token,
+ final AnalyzedTokenReadings[] tokens, final int i, final int j,
+ final boolean precSpace,
+ final boolean follSpace) {
+ return true;
+ }
+
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ if (endOfParagraph) {
+ reset();
+ }
+
+ ruleMatchIndex = getMatchesIndex();
+
+ for (int i = 1; i < tokens.length; i++) {
+ for (int j = 0; j < startSymbols.length; j++) {
+
+ final String token = tokens[i].getToken();
+ if (token.equals(startSymbols[j]) || token.equals(endSymbols[j])) {
+ boolean precededByWhitespace = true;
+ if (startSymbols[j].equals(endSymbols[j])) {
+ precededByWhitespace = tokens[i - 1].isSentStart()
+ || tokens[i].isWhitespaceBefore()
+ || PUNCTUATION_NO_DOT.matcher(tokens[i - 1].getToken())
+ .matches();
+ }
+
+ boolean followedByWhitespace = true;
+ if (i < tokens.length - 1 && startSymbols[j].equals(endSymbols[j])) {
+ followedByWhitespace = tokens[i + 1].isWhitespaceBefore()
+ || PUNCTUATION.matcher(tokens[i + 1].getToken()).matches();
+ }
+
+ final boolean noException = isNoException(token, tokens, i, j,
+ precededByWhitespace, followedByWhitespace);
+
+ if (noException && precededByWhitespace
+ && token.equals(startSymbols[j])) {
+ symbolStack.push(new SymbolLocator(startSymbols[j], i));
+ } else if (noException && followedByWhitespace
+ && token.equals(endSymbols[j])) {
+ if (i > 1 && endSymbols[j].equals(")")
+ && (NUMERALS.matcher(tokens[i - 1].getToken()).matches()
+ && !(!symbolStack.empty()
+ && "(".equals(symbolStack.peek().symbol)))) {
+ } else {
+ if (symbolStack.empty()) {
+ symbolStack.push(new SymbolLocator(endSymbols[j], i));
+ } else {
+ if (symbolStack.peek().symbol.equals(startSymbols[j])) {
+ symbolStack.pop();
+ } else {
+ symbolStack.push(new SymbolLocator(endSymbols[j], i));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (final SymbolLocator sLoc : symbolStack) {
+ final RuleMatch rMatch = createMatch(tokens[sLoc.index].getStartPos(),
+ sLoc.symbol);
+ if (rMatch != null) {
+ ruleMatches.add(rMatch);
+ }
+ }
+ symbolStack.clear();
+ if (tokens[tokens.length - 1].isParaEnd()) {
+ endOfParagraph = true;
+ }
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private RuleMatch createMatch(final int startPos, final String symbol) {
+ if (!ruleMatchStack.empty()) {
+ final int index = findSymbolNum(symbol);
+ if (index >= 0) {
+ final RuleMatchLocator rLoc = ruleMatchStack.peek();
+ if (rLoc.symbol.equals(startSymbols[index])) {
+ if (ruleMatches.size() > rLoc.myIndex) {
+ ruleMatches.remove(rLoc.myIndex);
+ ruleMatchStack.pop();
+ return null;
+ // if (ruleMatches.get(rLoc.myIndex).getFromPos())
+ }
+ if (isInMatches(rLoc.index)) {
+ setAsDeleted(rLoc.index);
+ ruleMatchStack.pop();
+ return null;
+ }
+ }
+ }
+ }
+ ruleMatchStack.push(new RuleMatchLocator(symbol, ruleMatchIndex,
+ ruleMatches.size()));
+ ruleMatchIndex++;
+ return new RuleMatch(this, startPos, startPos + symbol.length(), messages
+ .getString("unpaired_brackets"));
+ }
+
+ private int findSymbolNum(final String ch) {
+ for (int i = 0; i < endSymbols.length; i++) {
+ if (ch.equals(endSymbols[i])) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Reset the state information for the rule, including paragraph-level
+ * information.
+ */
+ public final void reset() {
+ ruleMatchStack.clear();
+ symbolStack.clear();
+ if (!endOfParagraph) {
+ clearMatches();
+ }
+ endOfParagraph = false;
+ }
+
+}
+
+class RuleMatchLocator extends SymbolLocator {
+ public int myIndex;
+
+ RuleMatchLocator(final String sym, final int ind, final int myInd) {
+ super(sym, ind);
+ myIndex = myInd;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/IncorrectExample.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/IncorrectExample.java
new file mode 100644
index 0000000..0d3478f
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/IncorrectExample.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * A text, typically a sentence, that contains an error.
+ *
+ * @since 0.9.2
+ * @author Daniel Naber
+ */
+public class IncorrectExample {
+
+ private String example;
+ private List<String> corrections;
+
+ public IncorrectExample(final String example) {
+ this.example = example;
+ }
+
+ public IncorrectExample(final String example, final String[] corrections) {
+ this(example);
+ this.corrections = Arrays.asList(corrections);
+ }
+
+ /**
+ * Return the example that contains the error.
+ */
+ public String getExample() {
+ return example;
+ }
+
+ /**
+ * Return the possible corrections. May be null.
+ */
+ public List<String> getCorrections() {
+ return corrections;
+ }
+
+ public String toString() {
+ return example + " " + corrections;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Rule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Rule.java
new file mode 100644
index 0000000..210754c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/Rule.java
@@ -0,0 +1,230 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Abstract rule class. A Rule describes a language error and can test whether a
+ * given pre-analyzed text contains that error using the {@link Rule#match}
+ * method.
+ *
+ * @author Daniel Naber
+ */
+public abstract class Rule {
+
+ private List<String> correctExamples;
+ private List<IncorrectExample> incorrectExamples;
+ private Category category;
+
+ /**
+ * If true, then the rule is turned off by default.
+ */
+ private boolean defaultOff;
+
+ protected ResourceBundle messages;
+
+ /**
+ * Called by language-dependent rules.
+ */
+ public Rule() {
+ }
+
+ /**
+ * Called by language-independent rules.
+ */
+ public Rule(final ResourceBundle messages) {
+ this.messages = messages;
+ }
+
+ public abstract String getId();
+
+ public abstract String getDescription();
+
+ /**
+ * Used by paragraph rules to signal that they can remove previous rule
+ * matches.
+ */
+ private boolean paragraphBackTrack;
+
+ /**
+ * The final list of RuleMatches, without removed matches.
+ */
+ private List<RuleMatch> previousMatches;
+
+ private List<RuleMatch> removedMatches;
+
+ /**
+ * Check whether the given text matches this error rule, i.e. whether the text
+ * contains this error.
+ *
+ * @param text
+ * a pre-analyzed sentence
+ * @return an array of RuleMatch object for each match.
+ */
+ public abstract RuleMatch[] match(AnalyzedSentence text) throws IOException;
+
+ /**
+ * If a rule keeps its state over more than the check of one sentence, this
+ * must be implemented so the internal state is reset. It will be called
+ * before a new text is going to be checked.
+ */
+ public abstract void reset();
+
+ /**
+ * Whether this rule can be used for text in the given language.
+ */
+ public final boolean supportsLanguage(final Language language) {
+ final Set<String> relevantIDs = language.getRelevantRuleIDs();
+ return relevantIDs != null && relevantIDs.contains(getId());
+ }
+
+ /**
+ * Set the examples that are correct and thus do not trigger the rule.
+ */
+ public final void setCorrectExamples(final List<String> correctExamples) {
+ this.correctExamples = correctExamples;
+ }
+
+ /**
+ * Get example sentences that are correct and thus will not match this rule.
+ */
+ public final List<String> getCorrectExamples() {
+ return correctExamples;
+ }
+
+ /**
+ * Set the examples that are incorrect and thus do trigger the rule.
+ */
+ public final void setIncorrectExamples(
+ final List<IncorrectExample> incorrectExamples) {
+ this.incorrectExamples = incorrectExamples;
+ }
+
+ /**
+ * Get example sentences that are incorrect and thus will match this rule.
+ */
+ public final List<IncorrectExample> getIncorrectExamples() {
+ return incorrectExamples;
+ }
+
+ public final Category getCategory() {
+ return category;
+ }
+
+ public final void setCategory(final Category category) {
+ this.category = category;
+ }
+
+ protected final RuleMatch[] toRuleMatchArray(final List<RuleMatch> ruleMatches) {
+ return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+ }
+
+ public final boolean isParagraphBackTrack() {
+ return paragraphBackTrack;
+ }
+
+ public final void setParagraphBackTrack(final boolean backTrack) {
+ paragraphBackTrack = backTrack;
+ }
+
+ /**
+ * Method to add matches.
+ *
+ * @param r
+ * RuleMatch - matched rule added by check()
+ */
+ public final void addRuleMatch(final RuleMatch r) {
+ if (previousMatches == null) {
+ previousMatches = new ArrayList<RuleMatch>();
+ }
+ previousMatches.add(r);
+ }
+
+ /**
+ * Deletes (or disables) previously matched rule.
+ *
+ * @param i
+ * Index of the rule that should be deleted.
+ */
+ public final void setAsDeleted(final int i) {
+ if (removedMatches == null) {
+ removedMatches = new ArrayList<RuleMatch>();
+ }
+ removedMatches.add(previousMatches.get(i));
+ }
+
+ public final boolean isInRemoved(final RuleMatch r) {
+ if (removedMatches == null) {
+ return false;
+ }
+ return removedMatches.contains(r);
+ }
+
+ public final boolean isInMatches(final int i) {
+ if (previousMatches == null) {
+ return false;
+ }
+ if (previousMatches.size() > i) {
+ return previousMatches.get(i) != null;
+ }
+ return false;
+ }
+
+ public final void clearMatches() {
+ if (previousMatches != null) {
+ previousMatches.clear();
+ }
+ }
+
+ public final int getMatchesIndex() {
+ if (previousMatches == null) {
+ return 0;
+ }
+ return previousMatches.size();
+ }
+
+ public final List<RuleMatch> getMatches() {
+ return previousMatches;
+ }
+
+ /**
+ * Checks whether the rule has been turned off by default by the rule author.
+ *
+ * @return True if the rule is turned off by default.
+ */
+ public final boolean isDefaultOff() {
+ return defaultOff;
+ }
+
+ /**
+ * Turns the rule by default off.
+ **/
+ public final void setDefaultOff() {
+ defaultOff = true;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/RuleMatch.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/RuleMatch.java
new file mode 100644
index 0000000..05746fb
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/RuleMatch.java
@@ -0,0 +1,239 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A class that holds information about where a rule matches text.
+ *
+ * @author Daniel Naber
+ */
+public class RuleMatch implements Comparable<RuleMatch> {
+
+ private static final Pattern SUGGESTION_PATTERN = Pattern.compile("<suggestion>(.*?)</suggestion>");
+
+ private int fromLine = -1;
+ private int column = -1;
+ private int offset = -1;
+ private int endLine = -1;
+ private int endColumn = -1;
+
+ private Rule rule;
+ private int fromPos;
+ private int toPos;
+ private String message;
+ // for OOo context menu
+ private String shortMessage;
+
+ private List<String> suggestedReplacements = new ArrayList<String>();
+
+//TODO: remove this one after all rules get their short comments in place
+ public RuleMatch(Rule rule, int fromPos, int toPos, String message) {
+ this(rule, fromPos, toPos, message, null, false);
+ }
+
+ // TODO: remove this constructor?
+ public RuleMatch(Rule rule, int fromPos, int toPos, String message, String shortMessage) {
+ this(rule, fromPos, toPos, message, shortMessage, false);
+ }
+
+ /**
+ * Creates a RuleMatch object, taking the rule that triggered
+ * this match, position of the match and an explanation message.
+ * This message is scanned for &lt;suggestion>...&lt;/suggestion> to get suggested
+ * fixes for the problem detected by this rule.
+ *
+ * @param startWithUppercase whether the original text at the position
+ * of the match start with an uppercase character
+ */
+ public RuleMatch(Rule rule, int fromPos, int toPos, String message, String shortMessage,
+ boolean startWithUppercase) {
+ this.rule = rule;
+ this.fromPos = fromPos;
+ this.toPos = toPos;
+ this.message = message;
+ this.shortMessage = shortMessage;
+ // extract suggestion from <suggestion>...</suggestion> in message:
+ final Matcher matcher = SUGGESTION_PATTERN.matcher(message);
+ int pos = 0;
+ while (matcher.find(pos)) {
+ pos = matcher.end();
+ String repl = matcher.group(1);
+ if (startWithUppercase)
+ repl = StringTools.uppercaseFirstChar(repl);
+ suggestedReplacements.add(repl);
+ }
+ }
+
+ public Rule getRule() {
+ return rule;
+ }
+
+ /**
+ * Set the line number in which the match occurs.
+ */
+ public void setLine(final int fromLine) {
+ this.fromLine = fromLine;
+ }
+
+ /**
+ * Get the line number in which the match occurs.
+ */
+ public int getLine() {
+ return fromLine;
+ }
+
+ /**
+ * Set the line number in which the match ends.
+ */
+ public void setEndLine(final int endLine) {
+ this.endLine = endLine;
+ }
+
+ /**
+ * Get the line number in which the match ends.
+ */
+ public int getEndLine() {
+ return endLine;
+ }
+
+ /**
+ * Set the column number in which the match occurs.
+ */
+ public void setColumn(final int column) {
+ this.column = column;
+ }
+
+ /**
+ * Get the column number in which the match occurs.
+ */
+ public int getColumn() {
+ return column;
+ }
+
+ /**
+ * Set the column number in which the match ends.
+ */
+ public void setEndColumn(final int endColumn) {
+ this.endColumn = endColumn;
+ }
+
+ /**
+ * Get the column number in which the match ends.
+ */
+ public int getEndColumn() {
+ return endColumn;
+ }
+
+ /**
+ * Set the character offset at which the match occurs.
+ */
+ public void setOffset(final int offset) {
+ this.offset = offset;
+ }
+
+ /**
+ * Get the character offset at which the match occurs.
+ */
+ public int getOffset() {
+ return offset;
+ }
+
+ /**
+ * Position of the start of the error (in characters).
+ */
+ public int getFromPos() {
+ return fromPos;
+ }
+
+ /**
+ * Position of the end of the error (in characters).
+ */
+ public int getToPos() {
+ return toPos;
+ }
+
+ /**
+ * A human-readable explanation describing the error.
+ */
+ public String getMessage() {
+ return message;
+ }
+
+ /**
+ * A shorter human-readable explanation describing the error.
+ */
+ public String getShortMessage() {
+ return shortMessage;
+ }
+
+
+ /**
+ * @see #getSuggestedReplacements()
+ */
+ public void setSuggestedReplacement(final String replacement) {
+ if (replacement == null)
+ throw new NullPointerException("replacement might be empty but not null");
+ final List<String> fixes = new ArrayList<String>();
+ fixes.add(replacement);
+ setSuggestedReplacements(fixes);
+ }
+
+ /**
+ * @see #getSuggestedReplacements()
+ */
+ public void setSuggestedReplacements(final List<String> replacement) {
+ if (replacement == null)
+ throw new NullPointerException("replacement might be empty but not null");
+ this.suggestedReplacements = replacement;
+ }
+
+ /**
+ * The text fragments which might be an appropriate fix for the problem. One
+ * of these fragments can be used to replace the old text between getFromPos()
+ * to getToPos(). Text between &lt;suggestion> and &lt;/suggestion> is
+ * taken as the suggested replacement.
+ * @return List of String objects or an empty List
+ */
+ public List<String> getSuggestedReplacements() {
+ return suggestedReplacements;
+ }
+
+ @Override
+ public String toString() {
+ return rule.getId() + ":" + fromPos + "-" + toPos + ":" + message;
+ }
+
+ public int compareTo(final RuleMatch other) {
+ if (other == null)
+ throw new ClassCastException();
+ if (getFromPos() < other.getFromPos())
+ return -1;
+ if (getFromPos() > other.getFromPos())
+ return 1;
+ return 0;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java
new file mode 100644
index 0000000..35ecfa4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/UppercaseSentenceStartRule.java
@@ -0,0 +1,136 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Checks that a sentence starts with an uppercase letter.
+ *
+ * @author Daniel Naber
+ */
+public class UppercaseSentenceStartRule extends Rule {
+
+ private final Language language;
+
+ private String lastParagraphString = "";
+
+ public UppercaseSentenceStartRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_case")));
+ this.language = language;
+ }
+
+ public final String getId() {
+ return "UPPERCASE_SENTENCE_START";
+ }
+
+ public final String getDescription() {
+ return messages.getString("desc_uppercase_sentence");
+ }
+
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ if (tokens.length < 2) {
+ return toRuleMatchArray(ruleMatches);
+ }
+ int matchTokenPos = 1; // 0 = SENT_START
+ final String firstToken = tokens[matchTokenPos].getToken();
+ String secondToken = null;
+ String thirdToken = null;
+ // ignore quote characters:
+ if (tokens.length >= 3
+ && ("'".equals(firstToken) || "\"".equals(firstToken) || "„"
+ .equals(firstToken))) {
+ matchTokenPos = 2;
+ secondToken = tokens[matchTokenPos].getToken();
+ }
+ final String firstDutchToken = dutchSpecialCase(firstToken, secondToken,
+ tokens);
+ if (firstDutchToken != null) {
+ thirdToken = firstDutchToken;
+ matchTokenPos = 3;
+ }
+
+ String checkToken = firstToken;
+ if (thirdToken != null) {
+ checkToken = thirdToken;
+ } else if (secondToken != null) {
+ checkToken = secondToken;
+ }
+
+ final String lastToken = tokens[tokens.length - 1].getToken();
+
+ boolean noException = false;
+ //fix for lists; note - this will not always work for the last point in OOo,
+ //as OOo might serve paragraphs in any order.
+ if ((language == Language.RUSSIAN || language == Language.POLISH)
+ && (";".equals(lastParagraphString) || ";".equals(lastToken)
+ || ",".equals(lastParagraphString) || ",".equals(lastToken))) {
+ noException = true;
+ }
+ //fix for comma in last paragraph; note - this will not always work for the last point in OOo,
+ //as OOo might serve paragraphs in any order.
+ if ((language == Language.RUSSIAN || language == Language.ITALIAN
+ || language == Language.POLISH || language == Language.GERMAN)
+ && (",".equals(lastParagraphString))) {
+ noException = true;
+ }
+
+ lastParagraphString = lastToken;
+
+ if (checkToken.length() > 0) {
+ final char firstChar = checkToken.charAt(0);
+ if (Character.isLowerCase(firstChar) && (!noException)) {
+ final RuleMatch ruleMatch = new RuleMatch(this, tokens[matchTokenPos]
+ .getStartPos(), tokens[matchTokenPos].getStartPos()
+ + tokens[matchTokenPos].getToken().length(), messages
+ .getString("incorrect_case"));
+ ruleMatch.setSuggestedReplacement(Character.toUpperCase(firstChar)
+ + checkToken.substring(1));
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private String dutchSpecialCase(final String firstToken,
+ final String secondToken, final AnalyzedTokenReadings[] tokens) {
+ if (language != Language.DUTCH) {
+ return null;
+ }
+ if (tokens.length >= 3 && firstToken.equals("'")
+ && secondToken.matches("k|m|n|r|s|t")) {
+ return tokens[3].getToken();
+ }
+ return null;
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WhitespaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WhitespaceRule.java
new file mode 100644
index 0000000..61f1ca6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WhitespaceRule.java
@@ -0,0 +1,91 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Check if there is duplicated whitespace in a sentence.
+ * Considers two spaces as incorrect, and proposes a single space instead.
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class WhitespaceRule extends Rule {
+
+ public WhitespaceRule(final ResourceBundle messages, final Language language) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ @Override
+ public final String getId() {
+ return "WHITESPACE_RULE";
+ }
+
+ @Override
+ public final String getDescription() {
+ return messages.getString("desc_whitespacerepetition");
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ boolean prevWhite = false;
+ int prevLen = 0;
+ int prevPos = 0;
+ //note: we start from token 1
+ //token no. 0 is guaranteed to be SENT_START
+ int i = 1;
+ while (i < tokens.length) {
+ if (tokens[i].isWhitespace() && prevWhite && !tokens[i -1].isLinebreak()) {
+ final int pos = tokens[i -1].getStartPos();
+ while (i < tokens.length && tokens[i].isWhitespace()) {
+ prevLen += tokens[i].getToken().length();
+ i++;
+ }
+ final RuleMatch ruleMatch = new RuleMatch(this, prevPos, pos + prevLen, messages
+ .getString("whitespace_repetition"));
+ ruleMatch.setSuggestedReplacement(" ");
+ ruleMatches.add(ruleMatch);
+ }
+ if (i < tokens.length) {
+ prevWhite = tokens[i].isWhitespace();
+ prevLen = tokens[i].getToken().length();
+ prevPos = tokens[i].getStartPos();
+ i++;
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WordRepeatRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WordRepeatRule.java
new file mode 100644
index 0000000..c8060a5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/WordRepeatRule.java
@@ -0,0 +1,101 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Check if a word is repeated twice, e.g. "the the".
+ *
+ * @author Daniel Naber
+ */
+public class WordRepeatRule extends Rule {
+
+ public WordRepeatRule(final ResourceBundle messages, final Language language) {
+ super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ /**
+ * Implement this method to return <code>true</code> if there's
+ * a potential word repetition at the current position should be ignored,
+ * i.e. if no error should be created.
+ *
+ * @param tokens the tokens of the sentence currently being checked
+ * @param position the current position in the tokens
+ * @return this implementation always returns false
+ */
+ public boolean ignore(final AnalyzedTokenReadings[] tokens, final int position) {
+ return false;
+ }
+
+ @Override
+ public String getId() {
+ return "WORD_REPEAT_RULE";
+ }
+
+ @Override
+ public String getDescription() {
+ return messages.getString("desc_repetition");
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ String prevToken = "";
+ //note: we start from token 1
+ //token no. 0 is guaranteed to be SENT_START
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ // avoid "..." etc. to be matched:
+ boolean isWord = true;
+ if (token.length() == 1) {
+ final char c = token.charAt(0);
+ if (!Character.isLetter(c)) {
+ isWord = false;
+ }
+ }
+ final boolean isException = ignore(tokens, i);
+ if (isWord && prevToken.toLowerCase().equals(token.toLowerCase()) && !isException) {
+ final String msg = messages.getString("repetition");
+ final int prevPos = tokens[i - 1].getStartPos();
+ final int pos = tokens[i].getStartPos();
+ final RuleMatch ruleMatch = new RuleMatch(this, prevPos, pos+prevToken.length(), msg,
+ messages.getString("desc_repetition_short"));
+ ruleMatch.setSuggestedReplacement(prevToken);
+ ruleMatches.add(ruleMatch);
+ }
+ prevToken = token;
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/BitextRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/BitextRule.java
new file mode 100644
index 0000000..d508ae5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/BitextRule.java
@@ -0,0 +1,106 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Abstract bitext rule class. A BitextRule describes a language error and
+ * can test whether a given pre-analyzed pair of source and target text
+ * contains that error using the {@link Rule#match} method.
+ *
+ * @author Marcin Miłkowski
+ */
+
+public abstract class BitextRule extends Rule {
+
+ private List<StringPair> correctExamples;
+ private List<IncorrectBitextExample> incorrectExamples;
+
+ private Language sourceLanguage;
+
+ @Override
+ public abstract String getDescription();
+
+ public abstract String getMessage();
+
+ @Override
+ public abstract String getId();
+
+ @Override
+ public abstract RuleMatch[] match(AnalyzedSentence text) throws IOException;
+
+ public abstract RuleMatch[] match(AnalyzedSentence sourceText,
+ AnalyzedSentence targetText) throws IOException;
+
+ @Override
+ public abstract void reset();
+
+ /**
+ * Set the source language. If the language is not supported
+ * by LT, you need to use the default tokenizers etc.
+ * @param lang - Source Language
+ */
+ public final void setSourceLang(final Language lang) {
+ sourceLanguage = lang;
+ }
+
+ public final Language getSourceLang() {
+ return sourceLanguage;
+ }
+
+ /**
+ * Set the examples that are correct and thus do not trigger the rule.
+ */
+ public final void setCorrectBitextExamples(final List<StringPair> correctExamples) {
+ this.correctExamples = correctExamples;
+ }
+
+ /**
+ * Get example sentences that are correct and thus will not match this rule.
+ */
+ public final List<StringPair> getCorrectBitextExamples() {
+ return correctExamples;
+ }
+
+ /**
+ * Set the examples that are incorrect and thus do trigger the rule.
+ */
+ public final void setIncorrectBitextExamples(
+ final List<IncorrectBitextExample> incorrectExamples) {
+ this.incorrectExamples = incorrectExamples;
+ }
+
+ /**
+ * Get example sentences that are incorrect and thus will match this rule.
+ */
+ public final List<IncorrectBitextExample> getIncorrectBitextExamples() {
+ return incorrectExamples;
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/DifferentLengthRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/DifferentLengthRule.java
new file mode 100644
index 0000000..995772c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/DifferentLengthRule.java
@@ -0,0 +1,93 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Checks if the translation has a really different length than the source
+ * (smaller than 30% or longer by 250%).
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class DifferentLengthRule extends BitextRule {
+
+ static final String MSG = "Source and target translation lengths are very different!";
+
+ @Override
+ public String getDescription() {
+ return "Check if translation length is similar to source length";
+ }
+
+ @Override
+ public String getId() {
+ return "TRANSLATION_LENGTH";
+ }
+
+ public String getMessage() {
+ return MSG;
+ }
+
+ /**
+ * This method makes no sense for bitext, return null??
+ */
+ @Override
+ public RuleMatch[] match(AnalyzedSentence text) throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public RuleMatch[] match(AnalyzedSentence sourceText,
+ AnalyzedSentence targetText) throws IOException {
+
+ if (isLengthDifferent(
+ getPureText(sourceText), getPureText(targetText))) {
+ final RuleMatch[] rm = new RuleMatch[1];
+ final AnalyzedTokenReadings[] tokens = targetText.getTokens();
+ final int len = tokens[tokens.length - 1].getStartPos() + tokens[tokens.length - 1].getToken().length();
+ rm[0] = new RuleMatch(this, 1, len,
+ MSG);
+ return rm;
+ }
+ return new RuleMatch[0];
+ }
+
+ static boolean isLengthDifferent(final String src, final String trg) {
+ final double skew = (((double) src.length() / (double) trg.length()) * 100.00);
+ return (skew > 250 || skew < 30);
+ }
+
+ private static String getPureText(AnalyzedSentence text) {
+ final StringBuilder sb = new StringBuilder();
+ for (AnalyzedTokenReadings token : text.getTokens()) {
+ sb.append(token.getToken());
+ }
+ return sb.toString();
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/IncorrectBitextExample.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/IncorrectBitextExample.java
new file mode 100644
index 0000000..e877826
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/IncorrectBitextExample.java
@@ -0,0 +1,64 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.bitext.StringPair;
+
+/**
+ * A text, typically a pair of sentences that contains an error.
+ *
+ * @since 1.0.1
+ * @author Marcin Miłkowski
+ */
+public class IncorrectBitextExample {
+
+ private StringPair example;
+ private List<String> corrections;
+
+ public IncorrectBitextExample(final StringPair example) {
+ this.example = example;
+ }
+
+ public IncorrectBitextExample(final StringPair example, final String[] corrections) {
+ this(example);
+ this.corrections = Arrays.asList(corrections);
+ }
+
+ /**
+ * Return the example that contains the error.
+ */
+ public StringPair getExample() {
+ return example;
+ }
+
+ /**
+ * Return the possible corrections. May be null.
+ */
+ public List<String> getCorrections() {
+ return corrections;
+ }
+
+ public String toString() {
+ return example.getSource() + "/ " + example.getTarget() + " " + corrections;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/SameTranslationRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/SameTranslationRule.java
new file mode 100644
index 0000000..c9e1ace
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/bitext/SameTranslationRule.java
@@ -0,0 +1,88 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.bitext;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Checks if the translation for segments that have more than two words
+ * is different.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class SameTranslationRule extends BitextRule {
+
+ static final String MSG = "Source and target translation are the same!";
+
+ @Override
+ public String getDescription() {
+ return "Check if translation is the same as source";
+ }
+
+ @Override
+ public String getId() {
+ return "SAME_TRANSLATION";
+ }
+
+ public String getMessage() {
+ return MSG;
+ }
+
+ /**
+ * This method makes no sense for bitext, return null??
+ */
+ @Override
+ public RuleMatch[] match(AnalyzedSentence text) throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public RuleMatch[] match(AnalyzedSentence sourceText,
+ AnalyzedSentence targetText) throws IOException {
+
+ //This is just heuristics, checking word count
+ if (sourceText.getTokensWithoutWhitespace().length > 3
+ && getPureText(sourceText).equals(getPureText(targetText))) {
+ final RuleMatch[] rm = new RuleMatch[1];
+ final AnalyzedTokenReadings[] tokens = targetText.getTokens();
+ final int len = tokens[tokens.length - 1].getStartPos() + tokens[tokens.length - 1].getToken().length();
+ rm[0] = new RuleMatch(this, 1, len, MSG);
+ return rm;
+ }
+ return new RuleMatch[0];
+ }
+
+ private static String getPureText(AnalyzedSentence text) {
+ final StringBuilder sb = new StringBuilder();
+ for (AnalyzedTokenReadings token : text.getTokens()) {
+ sb.append(token.getToken());
+ }
+ return sb.toString();
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRule.java
new file mode 100644
index 0000000..eb5a3fa
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/AccentuacioReplaceRule.java
@@ -0,0 +1,90 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ca;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Catalan implementations for accentuation errors.
+ * This is basically the same as CastellanismesReplaceRule.java
+ * with a different error message.
+ *
+ * Loads the list of words from <code>rules/ca/accentuacio.txt</code>.
+ *
+ * TODO: Some of the entries are proper names (Greek gods, etc.), which
+ * aren't currently checked.
+ *
+ * @author Jimmy O'Regan
+ *
+ * Based on pl/SimpleReplaceRule.java
+ */
+public class AccentuacioReplaceRule extends AbstractSimpleReplaceRule {
+
+ public static final String CATALAN_ACCENTUACIO_REPLACE_RULE = "CA_ACCENTUACIO_REPLACE";
+
+ private static final String FILE_NAME = "/ca/accentuacio.txt";
+ // locale used on case-conversion
+ private static final Locale CA_LOCALE = new Locale("ca");
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+
+ public AccentuacioReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return CATALAN_ACCENTUACIO_REPLACE_RULE;
+ }
+
+ public String getDescription() {
+ return "Errors d'accentuació";
+ }
+
+ public String getShort() {
+ return "Accentuació";
+ }
+
+ public String getSuggestion() {
+ return " es un error d'accentuació, cal dir: ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return CA_LOCALE;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRule.java
new file mode 100644
index 0000000..3169b66
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ca/CastellanismesReplaceRule.java
@@ -0,0 +1,85 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ca;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Catalan implementations for Castelianisms, kept separate for an individual
+ * error message.
+ * Loads the list of words from <code>rules/ca/castellanismes.txt</code>.
+ *
+ * @author Jimmy O'Regan
+ *
+ * Based on pl/SimpleReplaceRule.java
+ */
+public class CastellanismesReplaceRule extends AbstractSimpleReplaceRule {
+
+ public static final String CATALAN_CASTELLANISMES_REPLACE_RULE = "CA_CASTELLANISMES_REPLACE";
+
+ private static final String FILE_NAME = "/ca/castellanismes.txt";
+ // locale used on case-conversion
+ private static final Locale caLocale = new Locale("ca");
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+
+ public CastellanismesReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return CATALAN_CASTELLANISMES_REPLACE_RULE;
+ }
+
+ public String getDescription() {
+ return "Barbarismes (Castellanismes)";
+ }
+
+ public String getShort() {
+ return "Castellanismes";
+ }
+
+ public String getSuggestion() {
+ return " es un castellanisme, cal dir: ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return caLocale;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/AgreementRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/AgreementRule.java
new file mode 100644
index 0000000..8afff0c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/AgreementRule.java
@@ -0,0 +1,405 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanToken;
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings;
+import de.danielnaber.languagetool.tagging.de.GermanTagger;
+import de.danielnaber.languagetool.tagging.de.GermanToken;
+import de.danielnaber.languagetool.tagging.de.GermanToken.POSType;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Simple agreement checker for German noun phrases. Checks agreement in:
+ *
+ * <ul>
+ * <li>DET/PRO NOUN: e.g. "mein Auto", "der Mann", "die Frau" (correct), "die Haus" (incorrect)</li>
+ * <li>DET/PRO ADJ NOUN: e.g. "der riesige Tisch" (correct), "die riesigen Tisch" (incorrect)</li>
+ * </ul>
+ *
+ * Note that this rule only checks agreement inside the noun phrase, not whether
+ * e.g. the correct case is used. For example, "Es ist das Haus dem Mann" is not
+ * detected as incorrect.
+ *
+ * @author Daniel Naber
+ */
+public class AgreementRule extends GermanRule {
+
+ private static final String KASUS = "Kasus";
+ private static final String NUMERUS = "Numerus";
+ private static final String GENUS = "Genus";
+
+ /*
+ * City names are incoherently tagged in the Morphy data. To avoid
+ * false alarms on phrases like "das Berliner Auto" we have to
+ * explicitly add these adjective readings to "Berliner" and to all
+ * other potential city names:
+ */
+ private static final String[] ADJ_READINGS = new String[] {
+ // singular:
+ "ADJ:NOM:SIN:MAS:GRU", "ADJ:NOM:SIN:NEU:GRU", "ADJ:NOM:SIN:FEM:GRU", // das Berliner Auto
+ "ADJ:GEN:SIN:MAS:GRU", "ADJ:GEN:SIN:NEU:GRU", "ADJ:GEN:SIN:FEM:GRU", // des Berliner Autos
+ "ADJ:DAT:SIN:MAS:GRU", "ADJ:DAT:SIN:NEU:GRU", "ADJ:DAT:SIN:FEM:GRU", // dem Berliner Auto
+ "ADJ:AKK:SIN:MAS:GRU", "ADJ:AKK:SIN:NEU:GRU", "ADJ:AKK:SIN:FEM:GRU", // den Berliner Bewohner
+ // plural:
+ "ADJ:NOM:PLU:MAS:GRU", "ADJ:NOM:PLU:NEU:GRU", "ADJ:NOM:PLU:FEM:GRU", // die Berliner Autos
+ "ADJ:GEN:PLU:MAS:GRU", "ADJ:GEN:PLU:NEU:GRU", "ADJ:GEN:PLU:FEM:GRU", // der Berliner Autos
+ "ADJ:DAT:PLU:MAS:GRU", "ADJ:DAT:PLU:NEU:GRU", "ADJ:DAT:PLU:FEM:GRU", // den Berliner Autos
+ "ADJ:AKK:PLU:MAS:GRU", "ADJ:AKK:PLU:NEU:GRU", "ADJ:AKK:PLU:FEM:GRU", // den Berliner Bewohnern
+ };
+
+
+ private static final Set<String> REL_PRONOUN = new HashSet<String>();
+ static {
+ REL_PRONOUN.add("der");
+ REL_PRONOUN.add("die");
+ REL_PRONOUN.add("das");
+ REL_PRONOUN.add("dessen");
+ REL_PRONOUN.add("deren");
+ REL_PRONOUN.add("dem");
+ REL_PRONOUN.add("den");
+ REL_PRONOUN.add("welche");
+ REL_PRONOUN.add("welcher");
+ REL_PRONOUN.add("welchen");
+ REL_PRONOUN.add("welchem");
+ REL_PRONOUN.add("welches");
+ }
+
+ private static final Set<String> PREPOSITIONS = new HashSet<String>();
+ static {
+ PREPOSITIONS.add("in");
+ PREPOSITIONS.add("auf");
+ PREPOSITIONS.add("an");
+ PREPOSITIONS.add("ab");
+ PREPOSITIONS.add("für");
+ PREPOSITIONS.add("zu");
+ // TODO: add more
+ }
+
+ public AgreementRule(final ResourceBundle messages) {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_grammar")));
+ }
+
+ public String getId() {
+ return "DE_AGREEMENT";
+ }
+
+ public String getDescription() {
+ return "Kongruenz von Nominalphrasen (unvollständig!), z.B. 'mein kleiner(kleines) Haus'";
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ int pos = 0;
+ for (int i = 0; i < tokens.length; i++) {
+ //defaulting to the first reading
+ //TODO: check for all readings
+ //and replace GermanTokenReading
+ final String posToken = tokens[i].getAnalyzedToken(0).getPOSTag();
+ if (posToken != null && posToken.equals(JLanguageTool.SENTENCE_START_TAGNAME))
+ continue;
+ //AnalyzedGermanToken analyzedToken = new AnalyzedGermanToken(tokens[i]);
+
+ final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[i];
+ final boolean relevantPronoun = isRelevantPronoun(tokens, i);
+
+ boolean ignore = couldBeRelativeClause(tokens, i);
+ if (i > 0) {
+ final String prevToken = tokens[i-1].getToken().toLowerCase();
+ if ((prevToken.equals("der") || prevToken.equals("die") || prevToken.equals("das"))
+ && tokens[i].getToken().equals("eine")) {
+ // TODO: "der eine Polizist" -> nicht ignorieren, sondern "der polizist" checken
+ ignore = true;
+ }
+ }
+
+ // avoid false alarm on "nichts Gutes":
+ if (analyzedToken.getToken().equals("nichts")) {
+ ignore = true;
+ }
+
+ if ((analyzedToken.hasReadingOfType(POSType.DETERMINER) || relevantPronoun) && !ignore) {
+ int tokenPos = i + 1;
+ if (tokenPos >= tokens.length)
+ break;
+ AnalyzedGermanTokenReadings nextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos];
+ nextToken = maybeAddAdjectiveReadings(nextToken, tokens, tokenPos);
+ if (nextToken.hasReadingOfType(POSType.ADJEKTIV)) {
+ tokenPos = i + 2;
+ if (tokenPos >= tokens.length)
+ break;
+ final AnalyzedGermanTokenReadings nextNextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos];
+ if (nextNextToken.hasReadingOfType(POSType.NOMEN)) {
+ // TODO: add a case (checkAdjNounAgreement) for special cases like "deren",
+ // e.g. "deren komisches Geschenke" isn't yet detected as incorrect
+ final RuleMatch ruleMatch = checkDetAdjNounAgreement((AnalyzedGermanTokenReadings)tokens[i],
+ nextToken, (AnalyzedGermanTokenReadings)tokens[i+2]);
+ if (ruleMatch != null) {
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ } else if (nextToken.hasReadingOfType(POSType.NOMEN)) {
+ final RuleMatch ruleMatch = checkDetNounAgreement((AnalyzedGermanTokenReadings)tokens[i],
+ (AnalyzedGermanTokenReadings)tokens[i+1]);
+ if (ruleMatch != null) {
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ }
+
+ pos += tokens[i].getToken().length();
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private boolean isRelevantPronoun(AnalyzedTokenReadings[] tokens, int pos) {
+ final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[pos];
+ boolean relevantPronoun = analyzedToken.hasReadingOfType(POSType.PRONOMEN);
+ // avoid false alarms:
+ final String token = tokens[pos].getToken();
+ if (pos > 0 && tokens[pos-1].getToken().equalsIgnoreCase("vor") && tokens[pos].getToken().equalsIgnoreCase("allem"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("er") || token.equalsIgnoreCase("sie") || token.equalsIgnoreCase("es"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("ich"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("du"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("dessen")) // avoid false alarm on: "..., dessen Leiche"
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("deren"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("sich")) // avoid false alarm
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("unser")) // avoid false alarm "unser Produkt": TODO!
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("aller"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("man"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("beiden"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("wessen"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("a"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("alle"))
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("etwas")) // TODO: doesn't have case -- but don't just ignore
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("was")) // TODO: doesn't have case -- but don't just ignore
+ relevantPronoun = false;
+ else if (token.equalsIgnoreCase("wer"))
+ relevantPronoun = false;
+ return relevantPronoun;
+ }
+
+ // see the comment at ADJ_READINGS:
+ private AnalyzedGermanTokenReadings maybeAddAdjectiveReadings(AnalyzedGermanTokenReadings nextToken,
+ AnalyzedTokenReadings[] tokens, int tokenPos) {
+ final String nextTerm = nextToken.getToken();
+ // Just a heuristic: nouns and proper nouns that end with "er" are considered
+ // city names:
+ if (nextTerm.endsWith("er") && tokens.length > tokenPos+1) {
+ final AnalyzedGermanTokenReadings nextNextToken = (AnalyzedGermanTokenReadings)tokens[tokenPos+1];
+ final GermanTagger tagger = new GermanTagger();
+ try {
+ final AnalyzedGermanTokenReadings nextATR = tagger.lookup(nextTerm.substring(0, nextTerm.length()-2));
+ final AnalyzedGermanTokenReadings nextNextATR = tagger.lookup(nextNextToken.getToken());
+ //System.err.println("nextATR: " + nextATR);
+ //System.err.println("nextNextATR: " + nextNextATR);
+ // "Münchner": special case as cutting off last two characters doesn't produce city name:
+ if ("Münchner".equals(nextTerm) ||
+ (nextATR != null &&
+ // tagging in Morphy for cities is not coherent:
+ (nextATR.hasReadingOfType(POSType.PROPER_NOUN) || nextATR.hasReadingOfType(POSType.NOMEN) &&
+ nextNextATR != null && nextNextATR.hasReadingOfType(POSType.NOMEN)))) {
+ final AnalyzedGermanToken[] adjReadings = new AnalyzedGermanToken[ADJ_READINGS.length];
+ for (int j = 0; j < ADJ_READINGS.length; j++) {
+ adjReadings[j] = new AnalyzedGermanToken(nextTerm, ADJ_READINGS[j], null);
+ }
+ nextToken = new AnalyzedGermanTokenReadings(adjReadings, nextToken.getStartPos());
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return nextToken;
+ }
+
+ // TODO: improve this so it only returns true for real relative clauses
+ private boolean couldBeRelativeClause(AnalyzedTokenReadings[] tokens, int pos) {
+ boolean comma;
+ boolean relPronoun;
+ if (pos >= 1) {
+ // avoid false alarm: "Das Wahlrecht, das Frauen zugesprochen bekamen." etc:
+ comma = tokens[pos-1].getToken().equals(",");
+ final String term = tokens[pos].getToken().toLowerCase();
+ relPronoun = REL_PRONOUN.contains(term);
+ if (comma && relPronoun)
+ return true;
+ }
+ if (pos >= 2) {
+ // avoid false alarm: "Der Mann, in dem quadratische Fische schwammen."
+ comma = tokens[pos-2].getToken().equals(",");
+ final String term1 = tokens[pos-1].getToken().toLowerCase();
+ final String term2 = tokens[pos].getToken().toLowerCase();
+ final boolean prep = PREPOSITIONS.contains(term1);
+ relPronoun = REL_PRONOUN.contains(term2);
+ return comma && prep && relPronoun;
+ }
+ return false;
+ }
+
+ private RuleMatch checkDetNounAgreement(final AnalyzedGermanTokenReadings token1,
+ final AnalyzedGermanTokenReadings token2) {
+ // avoid false alarm: "Gebt ihm Macht."
+ if (token1.getToken().equalsIgnoreCase("ihm"))
+ return null;
+ RuleMatch ruleMatch = null;
+ final Set<String> set1 = getAgreementCategories(token1);
+ if (set1 == null)
+ return null; // word not known, assume it's correct
+ final Set<String> set2 = getAgreementCategories(token2);
+ if (set2 == null)
+ return null;
+ /*System.err.println("#"+set1);
+ System.err.println("#"+set2);
+ System.err.println("");*/
+ set1.retainAll(set2);
+ if (set1.size() == 0) {
+ // TODO: better error message than just 'agreement error'
+ final String msg = "Möglicherweise fehlende Übereinstimmung (Kongruenz) zwischen Artikel und Nomen " +
+ "bezüglich Kasus, Numerus oder Genus. Beispiel: 'meine Haus' statt 'mein Haus'";
+ ruleMatch = new RuleMatch(this, token1.getStartPos(),
+ token2.getStartPos()+token2.getToken().length(), msg);
+ }
+ return ruleMatch;
+ }
+
+ private RuleMatch checkDetAdjNounAgreement(final AnalyzedGermanTokenReadings token1,
+ final AnalyzedGermanTokenReadings token2, final AnalyzedGermanTokenReadings token3) {
+ final Set<String> relax = new HashSet<String>();
+ final Set<String> set = retainCommonCategories(token1, token2, token3, relax);
+ RuleMatch ruleMatch = null;
+ if (set.size() == 0) {
+ // TODO: more detailed error message:
+ /*relax.add(KASUS);
+ set = retainCommonCategories(token1, token2, token3, relax);
+ if (set.size() > 0) {
+ System.err.println("KASUS!");
+ }
+ relax.clear();
+ relax.add(NUMERUS);
+ set = retainCommonCategories(token1, token2, token3, relax);
+ if (set.size() > 0) {
+ System.err.println("NUMERUS!");
+ }
+ relax.clear();
+ relax.add(GENUS);
+ set = retainCommonCategories(token1, token2, token3, relax);
+ if (set.size() > 0) {
+ System.err.println("GENUS!");
+ }*/
+ final String msg = "Möglicherweise fehlende Übereinstimmung (Kongruenz) zwischen Artikel, Adjektiv und " +
+ "Nomen bezüglich Kasus, Numerus oder Genus. Beispiel: 'mein kleiner Haus' " +
+ "statt 'mein kleines Haus'";
+ ruleMatch = new RuleMatch(this, token1.getStartPos(),
+ token3.getStartPos()+token3.getToken().length(), msg);
+ }
+ return ruleMatch;
+ }
+
+ private Set<String> retainCommonCategories(final AnalyzedGermanTokenReadings token1,
+ final AnalyzedGermanTokenReadings token2, final AnalyzedGermanTokenReadings token3,
+ Set<String> relax) {
+ final Set<String> set1 = getAgreementCategories(token1, relax);
+ if (set1 == null)
+ return null; // word not known, assume it's correct
+ final Set<String> set2 = getAgreementCategories(token2, relax);
+ if (set2 == null)
+ return null;
+ final Set<String> set3 = getAgreementCategories(token3, relax);
+ if (set3 == null)
+ return null;
+ /*System.err.println(token1.getToken()+"#"+set1);
+ System.err.println(token2.getToken()+"#"+set2);
+ System.err.println(token3.getToken()+"#"+set3);
+ System.err.println("");*/
+ set1.retainAll(set2);
+ set1.retainAll(set3);
+ return set1;
+ }
+
+ private Set<String> getAgreementCategories(final AnalyzedGermanTokenReadings aToken) {
+ return getAgreementCategories(aToken, new HashSet<String>());
+ }
+
+ /** Return Kasus, Numerus, Genus. */
+ private Set<String> getAgreementCategories(final AnalyzedGermanTokenReadings aToken, Set<String> omit) {
+ final Set<String> set = new HashSet<String>();
+ final List<AnalyzedGermanToken> readings = aToken.getGermanReadings();
+ for (AnalyzedGermanToken reading : readings) {
+ if (reading.getCasus() == null && reading.getNumerus() == null &&
+ reading.getGenus() == null)
+ continue;
+ if (reading.getGenus() == null) {
+ // "ich" and "wir" contains genus=ALG in the original data. Not sure if
+ // this is allowed, but expand this so "Ich Arbeiter" doesn't get flagged
+ // as incorrect:
+ set.add(makeString(reading.getCasus(), reading.getNumerus(), GermanToken.Genus.MASKULINUM, omit));
+ set.add(makeString(reading.getCasus(), reading.getNumerus(), GermanToken.Genus.FEMININUM, omit));
+ set.add(makeString(reading.getCasus(), reading.getNumerus(), GermanToken.Genus.NEUTRUM, omit));
+ } else {
+ set.add(makeString(reading.getCasus(), reading.getNumerus(), reading.getGenus(), omit));
+ }
+ }
+ return set;
+ }
+
+ private String makeString(GermanToken.Kasus casus, GermanToken.Numerus num, GermanToken.Genus gen,
+ Set<String> omit) {
+ final List<String> l = new ArrayList<String>();
+ if (casus != null && !omit.contains(KASUS))
+ l.add(casus.toString());
+ if (num != null && !omit.contains(NUMERUS))
+ l.add(num.toString());
+ if (gen != null && !omit.contains(GENUS))
+ l.add(gen.toString());
+ return StringTools.listToString(l, "/");
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CaseRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CaseRule.java
new file mode 100644
index 0000000..663e9ff
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CaseRule.java
@@ -0,0 +1,358 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanToken;
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings;
+import de.danielnaber.languagetool.tagging.de.GermanTagger;
+import de.danielnaber.languagetool.tagging.de.GermanToken;
+import de.danielnaber.languagetool.tagging.de.GermanToken.POSType;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Check that adjectives and verbs are not written with an uppercase
+ * first letter (except at the start of a sentence) and cases
+ * like this: <tt>Das laufen f&auml;llt mir leicht.</tt> (<tt>laufen</tt> needs
+ * to be uppercased).
+ *
+ * @author Daniel Naber
+ */
+public class CaseRule extends GermanRule {
+
+ private final GermanTagger tagger = new GermanTagger();
+
+ // wenn hinter diesen Wörtern ein Verb steht, ist es wohl ein substantiviertes Verb,
+ // muss also groß geschrieben werden:
+ private static final Set<String> nounIndicators = new HashSet<String>();
+ static {
+ nounIndicators.add("das");
+ nounIndicators.add("sein");
+ //indicator.add("seines"); // TODO: ?
+ //nounIndicators.add("ihr"); // would cause false alarm e.g. "Auf ihr stehen die Ruinen..."
+ nounIndicators.add("mein");
+ nounIndicators.add("dein");
+ nounIndicators.add("euer");
+ //indicator.add("ihres");
+ //indicator.add("ihren");
+ }
+
+ private static final Set<String> sentenceStartExceptions = new HashSet<String>();
+ static {
+ sentenceStartExceptions.add("(");
+ sentenceStartExceptions.add(":");
+ sentenceStartExceptions.add("\"");
+ sentenceStartExceptions.add("'");
+ sentenceStartExceptions.add("„");
+ sentenceStartExceptions.add("“");
+ sentenceStartExceptions.add("«");
+ sentenceStartExceptions.add("»");
+ }
+
+ private static final Set<String> exceptions = new HashSet<String>();
+ static {
+ exceptions.add("Für"); // "das Für und Wider"
+ exceptions.add("Wider"); // "das Für und Wider"
+ exceptions.add("Nachts"); // "des Nachts", "eines Nachts"
+ exceptions.add("Genüge");
+ exceptions.add("Zusage");
+ exceptions.add("Nachfrage");
+ exceptions.add("Sachverständiger");
+ exceptions.add("Nr");
+ exceptions.add("Sankt");
+ exceptions.add("Toter");
+ exceptions.add("Verantwortlicher");
+ exceptions.add("Wichtiges");
+ exceptions.add("Dr");
+ exceptions.add("Prof");
+ exceptions.add("Mr");
+ exceptions.add("Mrs");
+ exceptions.add("De"); // "De Morgan" etc
+ exceptions.add("Le"); // "Le Monde" etc
+ exceptions.add("Ihr");
+ exceptions.add("Ihre");
+ exceptions.add("Ihres");
+ exceptions.add("Ihren");
+ exceptions.add("Ihnen");
+ exceptions.add("Ihrem");
+ exceptions.add("Ihrer");
+ exceptions.add("Sie");
+ exceptions.add("Aus"); // "vor dem Aus stehen"
+ exceptions.add("Oder"); // der Fluss
+ exceptions.add("tun"); // "Sie müssen das tun"
+ exceptions.add("St"); // Paris St. Germain
+ exceptions.add("Las"); // Las Vegas, nicht "lesen"
+ exceptions.add("Folgendes"); // je nach Kontext groß (TODO)...
+ exceptions.add("besonderes"); // je nach Kontext groß (TODO): "etwas Besonderes"
+ exceptions.add("Hundert"); // je nach Kontext groß (TODO)
+ exceptions.add("Tausend"); // je nach Kontext groß (TODO)
+ exceptions.add("Übrigen"); // je nach Kontext groß (TODO), z.B. "im Übrigen"
+ exceptions.add("Unvorhergesehenes"); // je nach Kontext groß (TODO), z.B. "etwas Unvorhergesehenes"
+
+ exceptions.add("Englisch"); // TODO: alle Sprachen
+ exceptions.add("Deutsch");
+ exceptions.add("Französisch");
+ exceptions.add("Spanisch");
+ exceptions.add("Italienisch");
+ exceptions.add("Portugiesisch");
+ exceptions.add("Dänisch");
+ exceptions.add("Norwegisch");
+ exceptions.add("Schwedisch");
+ exceptions.add("Finnisch");
+ exceptions.add("Holländisch");
+ exceptions.add("Niederländisch");
+ exceptions.add("Polnisch");
+ exceptions.add("Tschechisch");
+ exceptions.add("Arabisch");
+ exceptions.add("Persisch");
+
+ exceptions.add("Schuld");
+ exceptions.add("Erwachsener");
+ exceptions.add("Jugendlicher");
+ exceptions.add("Link");
+ exceptions.add("Ausdrücke");
+ exceptions.add("Landwirtschaft");
+ exceptions.add("Flöße");
+ exceptions.add("Wild");
+ exceptions.add("Vorsitzender");
+ exceptions.add("Mrd");
+ exceptions.add("Links");
+ // Änderungen an der Rechtschreibreform 2006 erlauben hier Großschreibung:
+ exceptions.add("Du");
+ exceptions.add("Dir");
+ exceptions.add("Dich");
+ exceptions.add("Deine");
+ exceptions.add("Deinen");
+ exceptions.add("Deinem");
+ exceptions.add("Deines");
+ exceptions.add("Deiner");
+ exceptions.add("Euch");
+
+ exceptions.add("Neuem");
+ exceptions.add("Weitem");
+ exceptions.add("Weiteres");
+ exceptions.add("Langem");
+ exceptions.add("Längerem");
+ exceptions.add("Kurzem");
+ exceptions.add("Schwarzes"); // Schwarzes Brett
+ exceptions.add("Goldener"); // Goldener Schnitt
+ // TODO: add more exceptions here
+ }
+
+ private static final Set<String> myExceptionPhrases = new HashSet<String>();
+ static {
+ // use proper upper/lowercase spelling here:
+ myExceptionPhrases.add("ohne Wenn und Aber");
+ myExceptionPhrases.add("Große Koalition");
+ myExceptionPhrases.add("Großen Koalition");
+ myExceptionPhrases.add("im Großen und Ganzen");
+ myExceptionPhrases.add("Im Großen und Ganzen");
+ myExceptionPhrases.add("im Guten wie im Schlechten");
+ myExceptionPhrases.add("Im Guten wie im Schlechten");
+ }
+
+ private static final Set<String> substVerbenExceptions = new HashSet<String>();
+ static {
+ substVerbenExceptions.add("gehören");
+ substVerbenExceptions.add("bedeutet"); // "und das bedeutet..."
+ substVerbenExceptions.add("ermöglicht"); // "und das ermöglicht..."
+ substVerbenExceptions.add("sollen");
+ substVerbenExceptions.add("werden");
+ substVerbenExceptions.add("dürfen");
+ substVerbenExceptions.add("müssen");
+ substVerbenExceptions.add("so");
+ substVerbenExceptions.add("ist");
+ substVerbenExceptions.add("können");
+ substVerbenExceptions.add("muss");
+ substVerbenExceptions.add("muß");
+ substVerbenExceptions.add("wollen");
+ substVerbenExceptions.add("habe");
+ substVerbenExceptions.add("ein"); // nicht "einen" (Verb)
+ substVerbenExceptions.add("tun"); // "...dann wird er das tun."
+ substVerbenExceptions.add("bestätigt");
+ substVerbenExceptions.add("bestätigte");
+ substVerbenExceptions.add("bestätigten");
+ substVerbenExceptions.add("bekommen");
+ }
+
+ public CaseRule(final ResourceBundle messages) {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_case")));
+ }
+
+ public String getId() {
+ return "DE_CASE";
+ }
+
+ public String getDescription() {
+ return "Großschreibung von Nomen und substantivierten Verben";
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ int pos = 0;
+ boolean prevTokenIsDas = false;
+ for (int i = 0; i < tokens.length; i++) {
+ //FIXME: defaulting to the first analysis
+ //don't know if it's safe
+ final String posToken = tokens[i].getAnalyzedToken(0).getPOSTag();
+ if (posToken != null && posToken.equals(JLanguageTool.SENTENCE_START_TAGNAME))
+ continue;
+ if (i == 1) { // don't care about first word, UppercaseSentenceStartRule does this already
+ if (nounIndicators.contains(tokens[i].getToken().toLowerCase())) {
+ prevTokenIsDas = true;
+ }
+ continue;
+ }
+ final AnalyzedGermanTokenReadings analyzedToken = (AnalyzedGermanTokenReadings)tokens[i];
+ final String token = analyzedToken.getToken();
+ List<AnalyzedGermanToken> readings = analyzedToken.getGermanReadings();
+ AnalyzedGermanTokenReadings analyzedGermanToken2 = null;
+
+ boolean isBaseform = false;
+ if (analyzedToken.getReadingsLength() > 1 && token.equals(analyzedToken.getAnalyzedToken(0).getLemma())) {
+ isBaseform = true;
+ }
+ if ((readings == null || analyzedToken.getAnalyzedToken(0).getPOSTag() == null || analyzedToken.hasReadingOfType(GermanToken.POSType.VERB))
+ && isBaseform) {
+ // no match, e.g. for "Groß": try if there's a match for the lowercased word:
+
+ try {
+ analyzedGermanToken2 = tagger.lookup(token.toLowerCase());
+ if (analyzedGermanToken2 != null) {
+ readings = analyzedGermanToken2.getGermanReadings();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (prevTokenIsDas) {
+ // e.g. essen -> Essen
+ final String newToken = StringTools.uppercaseFirstChar(token);
+ try {
+ analyzedGermanToken2 = tagger.lookup(newToken);
+ //analyzedGermanToken2.hasReadingOfType(GermanToken.POSType.VERB)
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (Character.isLowerCase(token.charAt(0)) && !substVerbenExceptions.contains(token)) {
+ final String msg = "Substantivierte Verben werden groß geschrieben.";
+ final RuleMatch ruleMatch = new RuleMatch(this, tokens[i].getStartPos(),
+ tokens[i].getStartPos()+token.length(), msg);
+ final String word = tokens[i].getToken();
+ final String fixedWord = StringTools.uppercaseFirstChar(word);
+ ruleMatch.setSuggestedReplacement(fixedWord);
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ }
+ prevTokenIsDas = nounIndicators.contains(tokens[i].getToken().toLowerCase());
+ if (readings == null)
+ continue;
+ final boolean hasNounReading = analyzedToken.hasReadingOfType(GermanToken.POSType.NOMEN);
+ if (hasNounReading) // it's the spell checker's task to check that nouns are uppercase
+ continue;
+ try {
+ // TODO: this lookup should only happen once:
+ analyzedGermanToken2 = tagger.lookup(token.toLowerCase());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (analyzedToken.getAnalyzedToken(0).getPOSTag() == null && analyzedGermanToken2 == null) {
+ continue;
+ }
+ if (analyzedToken.getAnalyzedToken(0).getPOSTag() == null && analyzedGermanToken2 != null
+ && analyzedGermanToken2.getAnalyzedToken(0).getPOSTag() == null) {
+ // unknown word, probably a name etc
+ continue;
+ }
+
+ if (Character.isUpperCase(token.charAt(0)) &&
+ token.length() > 1 && // length limit = ignore abbreviations
+ !sentenceStartExceptions.contains(tokens[i-1].getToken()) &&
+ !StringTools.isAllUppercase(token) &&
+ !exceptions.contains(token) &&
+ !analyzedToken.hasReadingOfType(POSType.PROPER_NOUN) &&
+ !analyzedToken.isSentenceEnd() &&
+ !isExceptionPhrase(i, tokens)) {
+ final String msg = "Außer am Satzanfang werden nur Nomen und Eigennamen groß geschrieben";
+ final RuleMatch ruleMatch = new RuleMatch(this, tokens[i].getStartPos(),
+ tokens[i].getStartPos()+token.length(), msg);
+ final String word = tokens[i].getToken();
+ final String fixedWord = Character.toLowerCase(word.charAt(0)) + word.substring(1);
+ ruleMatch.setSuggestedReplacement(fixedWord);
+ ruleMatches.add(ruleMatch);
+ }
+ pos += token.length();
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private boolean isExceptionPhrase(int i, AnalyzedTokenReadings[] tokens) {
+ // TODO: speed up?
+ for (String exc : myExceptionPhrases) {
+ final String[] parts = exc.split(" ");
+ for (int j = 0; j < parts.length; j++) {
+ if (parts[j].equals(tokens[i].getToken())) {
+ /*System.err.println("*******"+j + " of " + parts.length + ": " + parts[j]);
+ System.err.println("start:" + tokens[i-j].getToken());
+ System.err.println("end:" + tokens[i-j+parts.length-1].getToken());*/
+ final int startIndex = i-j;
+ if (compareLists(tokens, startIndex, startIndex+parts.length-1, parts)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ private boolean compareLists(AnalyzedTokenReadings[] tokens, int startIndex, int endIndex, String[] parts) {
+ if (startIndex < 0)
+ return false;
+ int i = 0;
+ for (int j = startIndex; j <= endIndex; j++) {
+ //System.err.println("**" +tokens[j].getToken() + " <-> "+ parts[i]);
+ if (i >= parts.length)
+ return false;
+ if (!tokens[j].getToken().equals(parts[i])) {
+ return false;
+ }
+ i++;
+ }
+ return true;
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CompoundRule.java
new file mode 100644
index 0000000..f180acc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/CompoundRule.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Daniel Naber
+ */
+public class CompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/de/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Hyphenation problem");
+ super.setMsg("Dieses Kompositum wird mit Bindestrich geschrieben.",
+ "Dieses Kompositum wird zusammengeschrieben.",
+ "Dieses Kompositum wird zusammen oder mit Bindestrich geschrieben.");
+ }
+
+
+ public String getId() {
+ return "DE_COMPOUNDS";
+ }
+
+ public String getDescription() {
+ return "Zusammenschreibung von Komposita, z.B. 'CD-ROM' statt 'CD ROM'";
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/DashRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/DashRule.java
new file mode 100644
index 0000000..18bb670
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/DashRule.java
@@ -0,0 +1,84 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Pr&uuml;ft, dass in Bindestrich-Komposita kein Leerzeichen eingef&uuml;gt wird (wie z.B. in 'Di&auml;ten- Erh&ouml;hung').
+ *
+ * @author Daniel Naber
+ */
+public class DashRule extends GermanRule {
+
+ public DashRule(final ResourceBundle messages) {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ public String getId() {
+ return "DE_DASH";
+ }
+
+ public String getDescription() {
+ return "Keine Leerzeichen in Bindestrich-Komposita (wie z.B. in 'Diäten- Erhöhung')";
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ int pos = 0;
+ String prevToken = null;
+ for (int i = 0; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ if (tokens[i].isWhitespace()) {
+ // ignore
+ continue;
+ }
+ if (prevToken != null && !prevToken.equals("-") && prevToken.indexOf("--") == -1
+ && prevToken.indexOf("–-") == -1 // first char is some special kind of dash, found in Wikipedia
+ && prevToken.endsWith("-")) {
+ final char firstChar = token.charAt(0);
+ if (Character.isUpperCase(firstChar)) {
+ final String msg = "Möglicherweise fehlt ein 'und' oder es wurde nach dem Wort " +
+ "ein überflüssiges Leerzeichen eingefügt.";
+ final RuleMatch ruleMatch = new RuleMatch(this, tokens[i-1].getStartPos(),
+ tokens[i-1].getStartPos()+prevToken.length()+1, msg);
+ ruleMatch.setSuggestedReplacement(tokens[i-1].getToken());
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ prevToken = token;
+ pos += token.length();
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanLemmatizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanLemmatizer.java
new file mode 100644
index 0000000..ddcac98
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanLemmatizer.java
@@ -0,0 +1,84 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * Trivial German lemmatizer that can simply find the baseforms of
+ * those fullforms listed in <code>rules/de/fullform2baseform.txt</code>.
+ *
+ * @author Daniel Naber
+ */
+class GermanLemmatizer {
+
+ private static final String FILE_NAME = "/de/fullform2baseform.txt";
+ private static final String FILE_ENCODING = "utf-8";
+
+ private final Map<String, String> fullform2baseform;
+
+ GermanLemmatizer() throws IOException {
+ fullform2baseform = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILE_NAME));
+ }
+
+ String getBaseform(final String fullform) {
+ return fullform2baseform.get(fullform);
+ }
+
+ private Map<String, String> loadWords(InputStream file) throws IOException {
+ final Map<String, String> map = new HashMap<String, String>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, FILE_ENCODING);
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) { //ignore empty lines
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ final String[] parts = line.split(":");
+ if (parts.length != 2) {
+ throw new IOException("Format error in file " +JLanguageTool.getDataBroker().getFromRulesDirAsUrl(FILE_NAME)+", line: " + line);
+ }
+ final String baseform = parts[0];
+ final String[] fullforms = parts[1].split(",");
+ for (String fullform : fullforms) {
+ map.put(fullform.trim(), baseform);
+ }
+ }
+ } finally {
+ if (br != null) br.close();
+ if (isr != null) isr.close();
+ }
+ return map;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanRule.java
new file mode 100644
index 0000000..1fca395
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanRule.java
@@ -0,0 +1,30 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for rules for the German language.
+ *
+ * @author Daniel Naber
+ */
+public abstract class GermanRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanWordRepeatRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanWordRepeatRule.java
new file mode 100644
index 0000000..55f98b4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/GermanWordRepeatRule.java
@@ -0,0 +1,39 @@
+/*
+ * Created on 03.10.2009
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.WordRepeatRule;
+
+/**
+ * Check if a word is repeated twice, taking into account an exception
+ * for German where e.g. "..., die die ..." is often okay.
+ *
+ * @author Daniel Naber
+ */
+public class GermanWordRepeatRule extends WordRepeatRule {
+
+ public GermanWordRepeatRule(final ResourceBundle messages, final Language language) {
+ super(messages, language);
+ }
+
+ @Override
+ public String getId() {
+ return "GERMAN_WORD_REPEAT_RULE";
+ }
+
+ @Override
+ public boolean ignore(final AnalyzedTokenReadings[] tokens, final int position) {
+ // Don't mark error for cases like:
+ // "wie Honda und Samsung, die die Bezahlung ihrer Firmenchefs..."
+ if (position >= 2 && ",".equals(tokens[position - 2].getToken())) {
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WiederVsWiderRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WiederVsWiderRule.java
new file mode 100644
index 0000000..ea1c2aa
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WiederVsWiderRule.java
@@ -0,0 +1,91 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Check incorrect use of "spiegelt ... wider", namely using "wieder" instead
+ * of "wider", e.g. in "Das spiegelt die Situation wieder" (incorrect).
+ *
+ * @author Daniel Naber
+ */
+public class WiederVsWiderRule extends GermanRule {
+
+ public WiederVsWiderRule(ResourceBundle messages) {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_typo")));
+ }
+
+ public String getId() {
+ return "DE_WIEDER_VS_WIDER";
+ }
+
+ public String getDescription() {
+ return "Möglicher Tippfehler 'spiegeln ... wieder(wider)'";
+ }
+
+ public RuleMatch[] match(AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ int pos = 0;
+ boolean foundSpiegelt = false;
+ boolean foundWieder = false;
+ boolean foundWider = false;
+ for (AnalyzedTokenReadings token1 : tokens) {
+ final String token = token1.getToken();
+ if (token.trim().equals("")) {
+ // ignore
+ } else {
+ if (token.equalsIgnoreCase("spiegelt") || token.equalsIgnoreCase("spiegeln") || token.equalsIgnoreCase("spiegelte")
+ || token.equalsIgnoreCase("spiegelten") || token.equalsIgnoreCase("spiegelst")) {
+ foundSpiegelt = true;
+ } else if (token.equalsIgnoreCase("wieder") && foundSpiegelt) {
+ foundWieder = true;
+ } else if (token.equalsIgnoreCase("wider") && foundSpiegelt) {
+ foundWider = true;
+ }
+ if (foundSpiegelt && foundWieder && !foundWider) {
+ final String msg = "'wider' in 'widerspiegeln' wird mit 'i' statt mit 'ie' " +
+ "geschrieben, z.B. 'Das spiegelt die Situation gut wider.'";
+ final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + token.length(), msg);
+ ruleMatch.setSuggestedReplacement("wider");
+ ruleMatches.add(ruleMatch);
+ foundSpiegelt = false;
+ foundWieder = false;
+ foundWider = false;
+ }
+ }
+ pos += token.length();
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WordCoherencyRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WordCoherencyRule.java
new file mode 100644
index 0000000..2bba43a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/de/WordCoherencyRule.java
@@ -0,0 +1,156 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.de;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * A rule that matches words for which two different spellings are used
+ * throughout the document. Currently only implemented for German. Loads
+ * the relevant word from <code>rules/de/coherency.txt</code>.
+ *
+ * <p>Note that this should not be used for language variations like
+ * American English vs. British English or German "alte Rechtschreibung"
+ * vs. "neue Rechtschreibung" -- that's the task of a spell checker.
+ *
+ * @author Daniel Naber
+ */
+public class WordCoherencyRule extends GermanRule {
+
+ private static final String FILE_NAME = "/de/coherency.txt";
+ private static final String FILE_ENCODING = "utf-8";
+
+ private final Map<String, String> relevantWords; // e.g. "aufwendig -> aufwändig"
+ private Map<String, RuleMatch> shouldNotAppearWord = new HashMap<String, RuleMatch>(); // e.g. aufwändig -> RuleMatch of aufwendig
+
+ private final GermanLemmatizer germanLemmatizer;
+
+ public WordCoherencyRule(ResourceBundle messages) throws IOException {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_misc")));
+ relevantWords = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILE_NAME));
+ germanLemmatizer = new GermanLemmatizer();
+ }
+
+ public String getId() {
+ return "DE_WORD_COHERENCY";
+ }
+
+ public String getDescription() {
+ return "Einheitliche Schreibweise für Wörter mit mehr als einer korrekten Schreibweise";
+ }
+
+ public RuleMatch[] match(AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ int pos = 0;
+ for (AnalyzedTokenReadings tmpToken : tokens) {
+ //TODO: definitely should be changed
+ //if the general lemmatizer is working
+ //defaulting to the first element because the
+ //general German lemmatizer is not (yet) there
+ String token = tmpToken.getToken();
+ if (tmpToken.isWhitespace()) {
+ // ignore
+ } else {
+ final String origToken = token;
+ final List<AnalyzedToken> readings = tmpToken.getReadings();
+ // TODO: in theory we need to care about the other readings, too:
+ if (readings != null && readings.size() > 0) {
+ final String baseform = readings.get(0).getLemma();
+ if (baseform != null) {
+ token = baseform;
+ } else {
+ // not all words are known by the Tagger (esp. compounds), so use the
+ // file lookup:
+ final String manualLookup = germanLemmatizer.getBaseform(origToken);
+ if (manualLookup != null)
+ token = manualLookup;
+ }
+ }
+ if (shouldNotAppearWord.containsKey(token)) {
+ final RuleMatch otherMatch = shouldNotAppearWord.get(token);
+ final String otherSpelling = otherMatch.getMessage();
+ final String msg = "'" + token + "' und '" + otherSpelling +
+ "' sollten nicht gleichzeitig benutzt werden";
+ final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + origToken.length(), msg);
+ ruleMatch.setSuggestedReplacement(otherSpelling);
+ ruleMatches.add(ruleMatch);
+ } else if (relevantWords.containsKey(token)) {
+ final String shouldNotAppear = relevantWords.get(token);
+ // only used to display this spelling variation if the other one really occurs:
+ final RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos + origToken.length(), token);
+ shouldNotAppearWord.put(shouldNotAppear, potentialRuleMatch);
+ }
+ }
+ pos += tmpToken.getToken().length();
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private Map<String, String> loadWords(InputStream file) throws IOException {
+ final Map<String, String> map = new HashMap<String, String>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, FILE_ENCODING);
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ final String[] parts = line.split(";");
+ if (parts.length != 2) {
+ throw new IOException("Format error in file " + JLanguageTool.getDataBroker().getFromRulesDirAsUrl(FILE_NAME) + ", line: " + line);
+ }
+ map.put(parts[0], parts[1]);
+ map.put(parts[1], parts[0]);
+ }
+ } finally {
+ if (br != null) br.close();
+ if (isr != null) isr.close();
+ }
+ return map;
+ }
+
+ public void reset() {
+ shouldNotAppearWord = new HashMap<String, RuleMatch>();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/AvsAnRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/AvsAnRule.java
new file mode 100644
index 0000000..ae02ef5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/AvsAnRule.java
@@ -0,0 +1,251 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.TreeSet;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Check if the determiner (if any) preceding a word is:
+ * <ul>
+ * <li><i>an</i> if the next word starts with a vowel
+ * <li><i>a</i> if the next word does not start with a vowel
+ * </ul>
+ * This rule loads some exceptions from external files (e.g. <i>an hour</i>).
+ *
+ * @author Daniel Naber
+ */
+public class AvsAnRule extends EnglishRule {
+
+ private static final String FILENAME_A = "/en/det_a.txt";
+ private static final String FILENAME_AN = "/en/det_an.txt";
+
+ private final TreeSet<String> requiresA;
+ private final TreeSet<String> requiresAn;
+
+ public AvsAnRule(final ResourceBundle messages) throws IOException {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ requiresA = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_A));
+ requiresAn = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_AN));
+ }
+
+ @Override
+ public String getId() {
+ return "EN_A_VS_AN";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Use of 'a' vs. 'an'";
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ String prevToken = "";
+ int prevPos = 0;
+ //ignoring token 0, i.e., SENT_START
+ for (int i = 1; i < tokens.length; i++) {
+ String token = tokens[i].getToken();
+ boolean doesRequireA = false;
+ boolean doesRequireAn = false;
+ // check for exceptions:
+ boolean isException = false;
+ final String[] parts = token.split("[-']"); // for example, in "one-way" only "one" is relevant
+ if (parts.length >= 1 &&
+ !parts[0].equalsIgnoreCase("a")) { // avoid false alarm on "A-levels are..."
+ token = parts[0];
+ }
+ token = token.replaceAll("[^a-zA-Z0-9\\.']", ""); // e.g. >>an "industry party"<<
+ if (StringTools.isEmpty(token)) {
+ continue;
+ }
+ final char tokenFirstChar = token.charAt(0);
+ if (requiresA.contains(token.toLowerCase()) || requiresA.contains(token)) {
+ isException = true;
+ doesRequireA = true;
+ }
+ if (requiresAn.contains(token.toLowerCase()) || requiresAn.contains(token)) {
+ if (isException) {
+ throw new IllegalStateException(token + " is listed in both det_a.txt and det_an.txt");
+ }
+ isException = true;
+ doesRequireAn = true;
+ }
+
+ if (!isException) {
+ if (StringTools.isAllUppercase(token) || StringTools.isMixedCase(token)) {
+ // we don't know how all-uppercase and mixed case words (often abbreviations) are pronounced,
+ // so never complain about these:
+ doesRequireAn = false;
+ doesRequireA = false;
+ } else if (isVowel(tokenFirstChar)) {
+ doesRequireAn = true;
+ } else {
+ doesRequireA = true;
+ }
+ }
+ //System.err.println(prevToken + " " +token + ", a="+doesRequireA + ", an="+doesRequireAn);
+ String msg = null;
+ if (prevToken.equalsIgnoreCase("a") && doesRequireAn) {
+ String replacement = "an";
+ if (prevToken.equals("A")) {
+ replacement = "An";
+ }
+ msg = "Use <suggestion>" +replacement+ "</suggestion> instead of '" +prevToken+ "' if the following "+
+ "word starts with a vowel sound, e.g. 'an article', "
+ + "'an hour'";
+ } else if (prevToken.equalsIgnoreCase("an") && doesRequireA) {
+ String replacement = "a";
+ if (prevToken.equals("An")) {
+ replacement = "A";
+ }
+ msg = "Use <suggestion>" +replacement+ "</suggestion> instead of '" +prevToken+ "' if the following "+
+ "word doesn't start with a vowel sound, e.g. 'a sentence', "
+ + "'a university'";
+ }
+ if (msg != null) {
+ final RuleMatch ruleMatch = new RuleMatch(this, prevPos, prevPos+prevToken.length(), msg, "Wrong article");
+ ruleMatches.add(ruleMatch);
+ }
+ if (tokens[i].hasPosTag("DT")) {
+ prevToken = token;
+ prevPos = tokens[i].getStartPos();
+ } else {
+ prevToken = "";
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ /**
+ * Adds "a" or "an" to the English noun.
+ * Used for suggesting the proper form of the
+ * indefinite article.
+ * @param noun Word that needs an article.
+ * @return String containing the word with a determiner,
+ * or just the word if the word is an abbreviation.
+ */
+ public final String suggestAorAn(final String noun) {
+ String word = noun;
+ boolean doesRequireA = false;
+ boolean doesRequireAn = false;
+ // check for exceptions:
+ boolean isException = false;
+ final String[] parts = word.split("[-']"); // for example, in "one-way" only "one" is relevant
+ if (parts.length >= 1 &&
+ !parts[0].equalsIgnoreCase("a")) { // avoid false alarm on "A-levels are..."
+ word = parts[0];
+ }
+ //html entities!
+ word = word.replaceAll("&quot|&amp|&lt|&gt|[^a-zA-Z0-9]", ""); // e.g. >>an "industry party"<<
+ if (StringTools.isEmpty(word)) {
+ return word;
+ }
+ final char tokenFirstChar = word.charAt(0);
+ if (requiresA.contains(word.toLowerCase()) || requiresA.contains(word)) {
+ isException = true;
+ doesRequireA = true;
+ }
+ if (requiresAn.contains(word.toLowerCase()) || requiresAn.contains(word)) {
+ if (isException) {
+ throw new IllegalStateException(word + " is listed in both det_a.txt and det_an.txt");
+ }
+ isException = true;
+ doesRequireAn = true;
+ }
+ if (!isException) {
+ if (StringTools.isAllUppercase(word) || StringTools.isMixedCase(word)) {
+ // we don't know how all-uppercase words (often abbreviations) are pronounced,
+ // so never complain about these:
+ doesRequireAn = false;
+ doesRequireA = false;
+ } else if (isVowel(tokenFirstChar)) {
+ doesRequireAn = true;
+ } else {
+ doesRequireA = true;
+ }
+ }
+ if (doesRequireA) {
+ return "a " + noun;
+ } else if (doesRequireAn) {
+ return "an " + noun;
+ } else {
+ return noun;
+ }
+ }
+
+ private static boolean isVowel(char c) {
+ c = Character.toLowerCase(c);
+ return c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u';
+ }
+
+ /**
+ * Load words, normalized to lowercase.
+ */
+ private TreeSet<String> loadWords(final InputStream file) throws IOException {
+ BufferedReader br = null;
+ final TreeSet<String> set = new TreeSet<String>();
+ try {
+ br = new BufferedReader(new InputStreamReader(file));
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') {
+ continue;
+ }
+ if (line.charAt(0) == '*') {
+ set.add(line.substring(1));
+ } else {
+ set.add(line.toLowerCase());
+ }
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ }
+ return set;
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/CompoundRule.java
new file mode 100644
index 0000000..0e01523
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/CompoundRule.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Marcin Miłkowski, based on code by Daniel Naber
+ */
+
+public class CompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/en/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Hyphenation problem");
+ super.setMsg("This word is normally spelled with hyphen.",
+ "This word is normally spelled as one.",
+ "This expression is normally spelled as one or with hyphen.");
+ }
+
+ public String getId() {
+ return "EN_COMPOUNDS";
+ }
+
+ public String getDescription() {
+ return "Hyphenated words, e.g., 'case-sensitive' instead of 'case sensitive'";
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishRule.java
new file mode 100644
index 0000000..cd0036d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishRule.java
@@ -0,0 +1,30 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.en;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for rules for the English language.
+ *
+ * @author Daniel Naber
+ */
+public abstract class EnglishRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRule.java
new file mode 100644
index 0000000..4b32c05
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/en/EnglishUnpairedBracketsRule.java
@@ -0,0 +1,89 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.en;
+
+import java.util.ResourceBundle;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;
+
+public class EnglishUnpairedBracketsRule extends GenericUnpairedBracketsRule {
+
+ private static final String[] EN_START_SYMBOLS = { "[", "(", "{", "“", "\"", "'" };
+ private static final String[] EN_END_SYMBOLS = { "]", ")", "}", "”", "\"", "'" };
+
+ private static final Pattern NUMBER = Pattern.compile("\\d+");
+
+ public EnglishUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages, language);
+ startSymbols = EN_START_SYMBOLS;
+ endSymbols = EN_END_SYMBOLS;
+ }
+
+ public String getId() {
+ return "EN_UNPAIRED_BRACKETS";
+ }
+
+ protected boolean isNoException(final String token,
+ final AnalyzedTokenReadings[] tokens, final int i, final int j, final boolean precSpace,
+ final boolean follSpace) {
+
+
+//TODO: add an', o', 'till, 'tain't, 'cept, 'fore in the disambiguator
+//and mark up as contractions somehow
+// add exception for dates like '52
+
+ if (i <= 1) {
+ return true;
+ }
+
+ if (!precSpace && follSpace) {
+ // exception for English inches, e.g., 20"
+ if ("\"".equals(token)
+ && NUMBER.matcher(tokens[i - 1].getToken()).matches()) {
+ return false;
+ }
+ // Exception for English plural Saxon genetive
+ // current disambiguation scheme is a bit too greedy
+ // for adjectives
+ if ("'".equals(token) && tokens[i].hasPosTag("POS")) {
+ return false;
+ }
+ // puttin' on the Ritz
+ if ("'".equals(token) && tokens[i - 1].hasPosTag("VBG")
+ && tokens[i - 1].getToken().endsWith("in")) {
+ return false;
+ }
+ }
+ if (precSpace && !follSpace) {
+ // hold 'em!
+ if ("'".equals(token) && i + 1 < tokens.length
+ && "em".equals(tokens[i + 1].getToken())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/ElwithFemRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/ElwithFemRule.java
new file mode 100644
index 0000000..c22b9a3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/ElwithFemRule.java
@@ -0,0 +1,179 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.es;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.TreeSet;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Check if the determiner (if any) preceding a feminine noun is "el". This
+ * rule loads a list of words (feminine nouns starting with stressed ha- or a-)
+ * from an external file. These words enforce the use of 'el' as determiner
+ * instead of 'la' (also with 'un', 'algun' and 'ningun').
+ *
+ * Sample
+ *
+ * *la alma -> el alma
+ * *la hambre -> el hambre
+ *
+ * http://blog.lengua-e.com/2007/el-arma-determinante-masculino-ante-nombre-femenino/
+ * http://tinyurl.com/m9uzte
+ *
+ *
+ * @author Susana Sotelo Docio
+ *
+ * based on English AvsAnRule rule
+ */
+public class ElwithFemRule extends SpanishRule {
+
+ private static final String FILENAME_EL = "/es/el.txt";
+ private final TreeSet<String> requiresEl;
+
+ public ElwithFemRule(final ResourceBundle messages) throws IOException {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ requiresEl = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_EL));
+ }
+
+ @Override
+ public String getId() {
+ return "EL_WITH_FEM";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Uso de 'el' con sustantivos femeninos que comienzan por a- o ha- t\u00f3nicas";
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ String prevToken = "";
+ int prevPos = 0;
+ //ignoring token 0, i.e., SENT_START
+ for (int i = 1; i < tokens.length; i++) {
+ String token = tokens[i].getToken();
+ boolean doesRequireEl = false;
+
+ token = token.replaceAll("[^a-záéíóúñüA-ZÁÉÍÓÚÑÜ0-9\\.']", ""); // el 'alma'
+ if (StringTools.isEmpty(token)) {
+ continue;
+ }
+ if (requiresEl.contains(token.toLowerCase()) || requiresEl.contains(token)) {
+ doesRequireEl = true;
+ }
+
+ // FIXME: temporal solution for "La Haya" (change)
+ if (prevToken.equals("La") && token.equals("Haya")) {
+ doesRequireEl = false;
+ }
+
+ String msg = null;
+ String replacement = null;
+ if (prevToken.equalsIgnoreCase("la") && doesRequireEl)
+ {
+ replacement = "el";
+ if (prevToken.equals("La")) { replacement = "El"; }
+ }
+ else if (prevToken.equalsIgnoreCase("una") && doesRequireEl)
+ {
+ replacement = "un";
+ if (prevToken.equals("Una")) { replacement = "Un"; }
+ }
+ else if (prevToken.equalsIgnoreCase("alguna") && doesRequireEl)
+ {
+ replacement = "alg\u00fan";
+ if (prevToken.equals("Alguna")) { replacement = "Alg\u00fan"; }
+ }
+ else if (prevToken.equalsIgnoreCase("ninguna") && doesRequireEl)
+ {
+ replacement = "ning\u00fan";
+ if (prevToken.equals("Ninguna")) { replacement = "Ning\u00fan"; }
+ }
+
+ msg = "Use <suggestion>" +replacement+ "</suggestion> en lugar de '" +prevToken+ "' si la siguiente "+
+ "palabra comienza por 'a' o 'ha' t\u00f3nicas, por ejemplo 'el hampa', "
+ + "'un agua'";
+
+
+ if (replacement != null) {
+ final RuleMatch ruleMatch = new RuleMatch(this, prevPos, prevPos+prevToken.length(), msg, "Art\u00edculo incorrecto");
+ ruleMatches.add(ruleMatch);
+ }
+ if (tokens[i].hasPosTag("DA0FS0") || tokens[i].hasPosTag("DI0FS0") ) {
+ prevToken = token;
+ prevPos = tokens[i].getStartPos();
+ } else {
+ prevToken = "";
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ /**
+ * Load words, normalized to lowercase.
+ */
+ private TreeSet<String> loadWords(final InputStream file) throws IOException {
+ BufferedReader br = null;
+ final TreeSet<String> set = new TreeSet<String>();
+ try {
+ br = new BufferedReader(new InputStreamReader(file, "utf-8"));
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') {
+ continue;
+ }
+ if (line.charAt(0) == '*') {
+ set.add(line.substring(1));
+ } else {
+ set.add(line.toLowerCase());
+ }
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ }
+ return set;
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/SpanishRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/SpanishRule.java
new file mode 100644
index 0000000..4aaa297
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/es/SpanishRule.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.es;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for rules for Spanish.
+ *
+ * @author Susana Sotelo Docio
+ *
+ * based on English rules
+ */
+public abstract class SpanishRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/FrenchRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/FrenchRule.java
new file mode 100644
index 0000000..2ad4bcc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/FrenchRule.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.fr;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for French rules.
+ *
+ * @author Marcin Milkowski
+ */
+public abstract class FrenchRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRule.java
new file mode 100644
index 0000000..4c03049
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/fr/QuestionWhitespaceRule.java
@@ -0,0 +1,161 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.fr;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A rule that matches spaces before ?,:,; and ! (required for correct French
+ * punctuation).
+ *
+ * @author Marcin Miłkowski
+ */
+public class QuestionWhitespaceRule extends FrenchRule {
+
+ public QuestionWhitespaceRule(final ResourceBundle messages) {
+ // super(messages);
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ @Override
+ public String getId() {
+ return "FRENCH_WHITESPACE";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Insertion des espaces fines insécables";
+ }
+
+ @Override
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokens();
+ String prevToken = "";
+ int pos = 0;
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ final boolean isWhiteBefore = tokens[i].isWhitespaceBefore();
+ pos += token.length();
+ String msg = null;
+ final int fixPos = 0;
+ int fixLen = 0;
+ String suggestionText = null;
+ if (isWhiteBefore) {
+ if (token.equals("?")) {
+ msg = "Point d'interrogation est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = " ?";
+ fixLen = 1;
+ } else if (token.equals("!")) {
+ msg = "Point d'exclamation est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = " !";
+ fixLen = 1;
+ } else if (token.equals("»")) {
+ msg = "Le guillemet fermant est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = " »";
+ fixLen = 1;
+ } else if (token.equals(";")) {
+ msg = "Point-virgule est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = " ;";
+ fixLen = 1;
+ } else if (token.equals(":")) {
+ msg = "Deux-points sont précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = " :";
+ fixLen = 1;
+ }
+ } else {
+ if (token.equals("?") && !prevToken.equals("!")
+ && !prevToken.equals("\u00a0")) {
+ msg = "Point d'interrogation est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = prevToken + " ?";
+ fixLen = 1;
+ } else if (token.equals("!") && !prevToken.equals("?")
+ && !prevToken.equals("\u00a0")) {
+ msg = "Point d'exclamation est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = prevToken + " !";
+ fixLen = 1;
+ } else if (token.equals(";") && !prevToken.equals("\u00a0")) {
+ msg = "Point-virgule est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = prevToken + " ;";
+ fixLen = 1;
+ } else if (token.equals(":") && !prevToken.equals("\u00a0")) {
+ msg = "Deux-points précédés d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = prevToken + " :";
+ fixLen = 1;
+ } else if (token.equals("»") && !prevToken.equals("\u00a0")) {
+ msg = "Le guillemet fermant est précédé d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = prevToken + " »";
+ fixLen = 1;
+ }
+ }
+
+ if (StringTools.isEmpty(token) && prevToken.equals("«")) {
+ msg = "Le guillemet ouvrant est suivi d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = "« ";
+ fixLen = 1;
+ } else if (!StringTools.isEmpty(token) && !token.equals("\u00a0")
+ && prevToken.equals("«")) {
+ msg = "Le guillemet ouvrant est suivi d'une espace fine insécable.";
+ // non-breaking space
+ suggestionText = "« ";
+ fixLen = 0;
+ }
+
+ if (msg != null) {
+ final int fromPos = tokens[i - 1].getStartPos() + fixPos;
+ final int toPos = tokens[i - 1].getStartPos() + fixPos + fixLen
+ + tokens[i - 1].getToken().length();
+ final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos, msg,
+ "Insérer un espace insécable");
+ if (suggestionText != null) {
+ ruleMatch.setSuggestedReplacement(suggestionText);
+ }
+ ruleMatches.add(ruleMatch);
+ }
+ prevToken = token;
+ }
+
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ @Override
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/AbstractPatternRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/AbstractPatternRule.java
new file mode 100644
index 0000000..d172134
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/AbstractPatternRule.java
@@ -0,0 +1,223 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * An Abstract Pattern Rule that describes a pattern of words or part-of-speech tags
+ * used for PatternRule and DisambiguationPatternRule.
+ *
+ * Introduced to minimize code duplication between those classes.
+ *
+ * @author Marcin Miłkowski
+ */
+
+public abstract class AbstractPatternRule extends Rule {
+
+ private final String id;
+
+ private final String description;
+
+ protected final List<Element> patternElements;
+
+ protected Unifier unifier;
+
+ protected final Language language;
+
+ protected int startPositionCorrection;
+
+ protected int endPositionCorrection;
+
+ protected boolean prevMatched;
+
+ protected final boolean testUnification;
+
+ private final boolean getUnified;
+
+ private boolean groupsOrUnification;
+
+ protected AnalyzedTokenReadings[] unifiedTokens;
+
+ protected final boolean sentStart;
+
+ public AbstractPatternRule(final String id,
+ final String description,
+ final Language language,
+ final List<Element> elements,
+ boolean getUnified) {
+ this.id = id;
+ this.description = description;
+ this.patternElements = new ArrayList<Element>(elements); // copy elements
+ this.language = language;
+ this.getUnified = getUnified;
+ unifier = language.getUnifier();
+ testUnification = initUnifier();
+ sentStart = patternElements.get(0).isSentStart();
+ if (!testUnification) {
+ for (Element elem : patternElements) {
+ if (elem.hasAndGroup()) {
+ groupsOrUnification = true;
+ break;
+ }
+ }
+ } else {
+ groupsOrUnification = true;
+ }
+ }
+
+ private boolean initUnifier() {
+ for (final Element elem : patternElements) {
+ if (elem.isUnified()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public final String toString() {
+ return id + ":" + patternElements + ":" + description;
+ }
+
+ @Override
+ public String getDescription() {
+ return description;
+ }
+
+ @Override
+ public String getId() {
+ return id;
+ }
+
+ @Override
+ public RuleMatch[] match(AnalyzedSentence text) throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void reset() {
+ // TODO Auto-generated method stub
+ }
+
+ public final void setStartPositionCorrection(final int startPositionCorrection) {
+ this.startPositionCorrection = startPositionCorrection;
+ }
+
+ public final void setEndPositionCorrection(final int endPositionCorrection) {
+ this.endPositionCorrection = endPositionCorrection;
+ }
+
+
+ protected void setupAndGroup(final int firstMatchToken,
+ final Element elem, final AnalyzedTokenReadings[] tokens)
+ throws IOException {
+ if (elem.hasAndGroup()) {
+ for (final Element andElement : elem.getAndGroup()) {
+ if (andElement.isReferenceElement()) {
+ setupRef(firstMatchToken, andElement, tokens);
+ }
+ }
+ elem.setupAndGroup();
+ }
+ }
+
+ //TODO: add .compile for all exceptions of the element?
+ protected void setupRef(final int firstMatchToken, final Element elem,
+ final AnalyzedTokenReadings[] tokens) throws IOException {
+ if (elem.isReferenceElement()) {
+ final int refPos = firstMatchToken + elem.getMatch().getTokenRef();
+ if (refPos < tokens.length) {
+ elem.compile(tokens[refPos], language.getSynthesizer());
+ }
+ }
+ }
+
+ protected boolean testAllReadings(final AnalyzedTokenReadings[] tokens,
+ final Element elem, final Element prevElement, final int tokenNo,
+ final int firstMatchToken, final int prevSkipNext) throws IOException {
+ boolean thisMatched = false;
+ final int numberOfReadings = tokens[tokenNo].getReadingsLength();
+ setupAndGroup(firstMatchToken, elem, tokens);
+ for (int l = 0; l < numberOfReadings; l++) {
+ final AnalyzedToken matchToken = tokens[tokenNo].getAnalyzedToken(l);
+ prevMatched = prevMatched || prevSkipNext > 0 && prevElement != null
+ && prevElement.isMatchedByScopeNextException(matchToken);
+ if (prevMatched) {
+ return false;
+ }
+ thisMatched = thisMatched || elem.isMatched(matchToken);
+ if (!thisMatched && !elem.isInflected() && elem.getPOStag() == null
+ && (prevElement != null && prevElement.getExceptionList() == null)) {
+ return false; // the token is the same, we will not get a match
+ }
+ if (groupsOrUnification) {
+ thisMatched &= testUnificationAndGroups(thisMatched,
+ l + 1 == numberOfReadings, matchToken, elem);
+ }
+ }
+ if (thisMatched) {
+ for (int l = 0; l < numberOfReadings; l++) {
+ if (elem.isExceptionMatchedCompletely(tokens[tokenNo].getAnalyzedToken(l)))
+ return false;
+ }
+ if (tokenNo > 0 && elem.hasPreviousException()) {
+ if (elem.isMatchedByPreviousException(tokens[tokenNo - 1]))
+ return false;
+ }
+ }
+ return thisMatched;
+ }
+
+ protected boolean testUnificationAndGroups(final boolean matched,
+ final boolean lastReading, final AnalyzedToken matchToken,
+ final Element elem) {
+ boolean thisMatched = matched;
+ if (testUnification) {
+ if (matched && elem.isUnified()) {
+ thisMatched = thisMatched && unifier.isUnified(matchToken, elem.getUniFeatures(),
+ elem.isUniNegated(), lastReading);
+ }
+ if (thisMatched && getUnified) {
+ unifiedTokens = unifier.getFinalUnified();
+ }
+ if (!elem.isUnified()) {
+ unifier.reset();
+ }
+ }
+ elem.addMemberAndGroup(matchToken);
+ if (lastReading) {
+ thisMatched &= elem.checkAndGroup(thisMatched);
+ }
+ return thisMatched;
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Element.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Element.java
new file mode 100644
index 0000000..0ad7c1f
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Element.java
@@ -0,0 +1,803 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A part of a pattern.
+ *
+ * @author Daniel Naber
+ */
+public class Element {
+
+ private String stringToken;
+ private String posToken;
+ private String regToken;
+ private boolean posRegExp;
+
+ private boolean negation;
+ private boolean posNegation;
+
+ private final boolean caseSensitive;
+ private final boolean stringRegExp;
+ private boolean inflected;
+
+ private boolean testWhitespace;
+ private boolean whitespaceBefore;
+
+ /**
+ * List of exceptions that are valid for the current token and / or some next
+ * tokens.
+ */
+ private List<Element> exceptionList;
+
+ /**
+ * True if scope=="next".
+ */
+ private boolean exceptionValidNext;
+
+ /**
+ * True if any exception with a scope=="current" or scope=="next" is set for
+ * the element.
+ */
+ private boolean exceptionSet;
+
+ /**
+ * True if attribute scope=="previous".
+ */
+ private boolean exceptionValidPrevious;
+
+ /**
+ * List of exceptions that are valid for a previous token.
+ */
+ private List<Element> previousExceptionList;
+
+ private List<Element> andGroupList;
+ private boolean andGroupSet;
+ private boolean[] andGroupCheck;
+
+ private int skip;
+
+ private Pattern p;
+ private Pattern pPos;
+
+ private Matcher m;
+ private Matcher mPos;
+
+ /** The reference to another element in the pattern. **/
+ private Match tokenReference;
+
+ /**
+ * True when the element stores a formatted reference to another element of
+ * the pattern.
+ */
+ private boolean containsMatches;
+
+ /** Matches only tokens without any POS tag. **/
+ private static final String UNKNOWN_TAG = "UNKNOWN";
+
+ /**
+ * Parameter passed to regular expression matcher to enable case insensitive
+ * Unicode matching.
+ */
+ private static final String CASE_INSENSITIVE = "(?iu)";
+
+ private String referenceString;
+
+ /** String ID of the phrase the element is in. **/
+ private String phraseName;
+
+ /**
+ * This var is used to determine if calling {@link #setStringElement} makes
+ * sense. This method takes most time so it's best to reduce the number of its
+ * calls.
+ **/
+ private boolean testString;
+
+ /**
+ * Tells if the element is inside the unification, so that {@link Unifier}
+ * tests it.
+ */
+ private boolean unified;
+ private boolean uniNegation;
+
+ private Map<String, List<String>> unificationFeatures;
+
+ /**
+ * Creates Element that is used to match tokens in the text.
+ *
+ * @param token
+ * String to be matched
+ * @param caseSensitive
+ * True if the check is case-sensitive.
+ * @param regExp
+ * True if the check uses regular expressions.
+ * @param inflected
+ * True if the check refers to base forms (lemmas).
+ */
+ public Element(final String token, final boolean caseSensitive,
+ final boolean regExp, final boolean inflected) {
+ this.caseSensitive = caseSensitive;
+ this.stringRegExp = regExp;
+ this.inflected = inflected;
+ setStringElement(token);
+ }
+
+ /**
+ * Checks whether the rule element matches the token given as a parameter.
+ *
+ * @param token
+ * @AnalyzedToken to check matching against
+ * @return True if token matches, false otherwise.
+ */
+ public final boolean isMatched(final AnalyzedToken token) {
+ if (testWhitespace && !isWhitespaceBefore(token)) {
+ return false;
+ }
+ boolean matched = false;
+ if (testString) {
+ matched = (isStringTokenMatched(token) ^ negation)
+ && (isPosTokenMatched(token) ^ posNegation);
+ } else {
+ matched = (!negation) && (isPosTokenMatched(token) ^ posNegation);
+ }
+
+ if (andGroupSet) {
+ andGroupCheck[0] |= matched;
+ }
+ return matched;
+ }
+
+ /**
+ * Checks whether an exception matches.
+ *
+ * @param token
+ * @AnalyzedToken to check matching against
+ * @return True if any of the exceptions matches (logical disjunction).
+ */
+ public final boolean isExceptionMatched(final AnalyzedToken token) {
+ if (exceptionSet) {
+ for (final Element testException : exceptionList) {
+ if (!testException.exceptionValidNext) {
+ if (testException.isMatched(token)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Enables testing multiple conditions specified by different elements.
+ * Doesn't test exceptions.
+ *
+ * Works as logical AND operator only if preceded with
+ * {@link #setupAndGroup()}, and followed by {@link #checkAndGroup(boolean)}.
+ *
+ * @param token
+ * AnalyzedToken - the token checked.
+ */
+ public final void addMemberAndGroup(final AnalyzedToken token) {
+ if (andGroupSet) {
+ for (int i = 0; i < andGroupList.size(); i++) {
+ if (!andGroupCheck[i + 1]) {
+ final Element testAndGroup = andGroupList.get(i);
+ if (testAndGroup.isMatched(token)) {
+ andGroupCheck[i + 1] = true;
+ }
+ }
+ }
+ }
+ }
+
+ public final void setupAndGroup() {
+ if (andGroupSet) {
+ andGroupCheck = new boolean[andGroupList.size() + 1];
+ Arrays.fill(andGroupCheck, false);
+ }
+ }
+
+ public final boolean checkAndGroup(final boolean previousValue) {
+ if (andGroupSet) {
+ boolean allConditionsMatch = true;
+ for (final boolean testValue : andGroupCheck) {
+ allConditionsMatch &= testValue;
+ }
+ return allConditionsMatch;
+ }
+ return previousValue;
+ }
+
+ /**
+ * Enables testing multiple conditions specified by multiple element
+ * exceptions.
+ *
+ * Works as logical AND operator.
+ *
+ * @param token
+ * AnalyzedToken - the token checked for exceptions.
+ * @return true if all conditions are met, false otherwise.
+ */
+ public final boolean isAndExceptionGroupMatched(final AnalyzedToken token) {
+ if (andGroupSet) {
+ for (final Element testAndGroup : andGroupList) {
+ if (testAndGroup.isExceptionMatched(token)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * This method checks exceptions both in AND-group and the token. Introduced
+ * to for clarity.
+ *
+ * @param token
+ * Token to match
+ * @return True if matched.
+ */
+ public final boolean isExceptionMatchedCompletely(final AnalyzedToken token) {
+ // note: short-circuiting possible
+ return isExceptionMatched(token) || isAndExceptionGroupMatched(token);
+ }
+
+ public final void setAndGroupElement(final Element andToken) {
+ if (andToken != null) {
+ if (andGroupList == null) {
+ andGroupList = new ArrayList<Element>();
+ }
+ if (!andGroupSet) {
+ andGroupSet = true;
+ }
+ andGroupList.add(andToken);
+ }
+ }
+
+ /**
+ * Checks if this element has an AND group associated with it.
+ *
+ * @return true if the element has a group of elements that all should match.
+ */
+ public final boolean hasAndGroup() {
+ return andGroupSet;
+ }
+
+ /**
+ * Returns the group of elements linked with AND operator.
+ *
+ * @return List of Elements.
+ */
+ public final List<Element> getAndGroup() {
+ return andGroupList;
+ }
+
+ /**
+ * Checks whether a previously set exception matches (in case the exception
+ * had scope == "next").
+ *
+ * @param token
+ * @AnalyzedToken to check matching against.
+ * @return True if any of the exceptions matches.
+ */
+ public final boolean isMatchedByScopeNextException(final AnalyzedToken token) {
+ if (exceptionSet) {
+ for (final Element testException : exceptionList) {
+ if (testException.exceptionValidNext) {
+ if (testException.isMatched(token)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks whether an exception for a previous token matches (in case the
+ * exception had scope == "previous").
+ *
+ * @param token
+ * {@link AnalyzedToken} to check matching against.
+ * @return True if any of the exceptions matches.
+ */
+ public final boolean isMatchedByPreviousException(final AnalyzedToken token) {
+ if (exceptionValidPrevious) {
+ for (final Element testException : previousExceptionList) {
+ if (!testException.exceptionValidNext) {
+ if (testException.isMatched(token)) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks whether an exception for a previous token matches all readings of a
+ * given token (in case the exception had scope == "previous").
+ *
+ * @param prevToken
+ * {@link AnalyzedTokenReadings} to check matching against.
+ * @return true if any of the exceptions matches.
+ */
+ public final boolean isMatchedByPreviousException(
+ final AnalyzedTokenReadings prevToken) {
+ final int numReadings = prevToken.getReadingsLength();
+ for (int i = 0; i < numReadings; i++) {
+ if (isMatchedByPreviousException(prevToken.getAnalyzedToken(i))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks if the token is a SENT_START.
+ *
+ * @return True if the element starts the sentence and the element hasn't been
+ * set to have negated POS token.
+ *
+ */
+ public final boolean isSentStart() {
+ return JLanguageTool.SENTENCE_START_TAGNAME.equals(posToken)
+ && !posNegation;
+ }
+
+ @Override
+ public final String toString() {
+ final StringBuilder sb = new StringBuilder();
+ if (negation) {
+ sb.append('!');
+ }
+ sb.append(stringToken);
+ if (phraseName != null) {
+ sb.append(" {");
+ sb.append(phraseName);
+ sb.append('}');
+ }
+ if (posToken != null) {
+ sb.append('/');
+ sb.append(posToken);
+ }
+ return sb.toString();
+ }
+
+ public final void setPosElement(final String posToken, final boolean regExp,
+ final boolean negation) {
+ this.posToken = posToken;
+ this.posNegation = negation;
+ posRegExp = regExp;
+ if (posRegExp) {
+ pPos = Pattern.compile(posToken);
+ }
+ }
+
+ public final String getString() {
+ return stringToken;
+ }
+
+ public final void setStringElement(final String token) {
+ this.stringToken = token;
+ testString = !StringTools.isEmpty(stringToken);
+ if (testString && stringRegExp) {
+ regToken = stringToken;
+ if (!caseSensitive) {
+ regToken = CASE_INSENSITIVE + stringToken;
+ }
+ if (!"\\0".equals(token)) {
+ p = Pattern.compile(regToken);
+ }
+ }
+ }
+
+ /**
+ * Sets a POS-type exception for matching string tokens.
+ *
+ * @param posToken
+ * The part of the speech tag in the exception.
+ * @param regExp
+ * True if the POS is specified as a regular expression.
+ * @param negation
+ * True if the exception is negated.
+ * @param scopeNext
+ * True if the exception scope is next tokens.
+ * @param scopePrevious
+ * True if the exception should match only a single previous token.
+ */
+ public final void setPosException(final String posToken,
+ final boolean regExp, final boolean negation, final boolean scopeNext,
+ final boolean scopePrevious) {
+ final Element posException = new Element("", this.caseSensitive, false,
+ false);
+ posException.setPosElement(posToken, regExp, negation);
+ posException.exceptionValidNext = scopeNext;
+ setException(posException, scopePrevious);
+ }
+
+ /**
+ * Sets a string-type exception for matching string tokens.
+ *
+ * @param token
+ * The string in the exception.
+ * @param regExp
+ * True if the string is specified as a regular expression.
+ * @param inflected
+ * True if the string is a base form (lemma).
+ * @param negation
+ * True if the exception is negated.
+ * @param scopeNext
+ * True if the exception scope is next tokens.
+ * @param scopePrevious
+ * True if the exception should match only a single previous token.
+ */
+ public final void setStringException(final String token,
+ final boolean regExp, final boolean inflected, final boolean negation,
+ final boolean scopeNext, final boolean scopePrevious) {
+ final Element stringException = new Element(token, this.caseSensitive,
+ regExp, inflected);
+ stringException.setNegation(negation);
+ stringException.exceptionValidNext = scopeNext;
+ setException(stringException, scopePrevious);
+ }
+
+ private void setException(final Element elem, final boolean scopePrevious) {
+ exceptionValidPrevious |= scopePrevious;
+ if (exceptionList == null && !scopePrevious) {
+ exceptionList = new ArrayList<Element>();
+ }
+ if (previousExceptionList == null && scopePrevious) {
+ previousExceptionList = new ArrayList<Element>();
+ }
+ if (scopePrevious) {
+ previousExceptionList.add(elem);
+ } else {
+ if (!exceptionSet) {
+ exceptionSet = true;
+ }
+ if (exceptionSet) {
+ exceptionList.add(elem);
+ }
+ }
+ }
+
+ /**
+ * Tests if part of speech matches a given string.
+ *
+ * @param token
+ * Token to test.
+ * @return true if matches
+ *
+ * Special value UNKNOWN_TAG matches null POS tags.
+ *
+ */
+ private boolean isPosTokenMatched(final AnalyzedToken token) {
+ // if no POS set
+ // defaulting to true
+ if (posToken == null) {
+ return true;
+ }
+ if (token.getPOSTag() == null) {
+ if (posRegExp) {
+ if (mPos == null) {
+ mPos = pPos.matcher(UNKNOWN_TAG);
+ } else {
+ mPos.reset(UNKNOWN_TAG);
+ }
+ return mPos.matches();
+ }
+ if (UNKNOWN_TAG.equals(posToken)) {
+ return true;
+ }
+ }
+ boolean match;
+ if (posRegExp) {
+ if (mPos == null) {
+ mPos = pPos.matcher(token.getPOSTag());
+ } else {
+ mPos.reset(token.getPOSTag());
+ }
+ match = mPos.matches();
+ } else {
+ match = posToken.equals(token.getPOSTag());
+ }
+ if (!match && UNKNOWN_TAG.equals(posToken)) { // these are helper tags,
+ // ignore them
+ match = JLanguageTool.SENTENCE_END_TAGNAME.equals(token.getPOSTag())
+ || JLanguageTool.PARAGRAPH_END_TAGNAME.equals(token.getPOSTag());
+ }
+ return match;
+ }
+
+ /**
+ * Tests whether the string token element matches a given token.
+ *
+ * @param token
+ * {@link AnalyzedToken} to match against.
+ * @return True if matches.
+ */
+ private boolean isStringTokenMatched(final AnalyzedToken token) {
+ final String testToken = getTestToken(token);
+ if (stringRegExp) {
+ if (m == null) {
+ m = p.matcher(testToken);
+ } else {
+ m.reset(testToken);
+ }
+ return m.matches();
+ }
+ if (caseSensitive) {
+ return stringToken.equals(testToken);
+ }
+ return stringToken.equalsIgnoreCase(testToken);
+ }
+
+ private String getTestToken(final AnalyzedToken token) {
+ // enables using words with lemmas and without lemmas
+ // in the same regexp with inflected="yes"
+ if (inflected) {
+ return token.getTokenInflected();
+ }
+ return token.getToken();
+ }
+
+ /**
+ * Gets the exception scope length.
+ *
+ * @return Scope length.
+ */
+ public final int getSkipNext() {
+ return skip;
+ }
+
+ /**
+ * Sets the exception scope length.
+ *
+ * @param i
+ * Exception scope length.
+ */
+ public final void setSkipNext(final int i) {
+ skip = i;
+ }
+
+ /**
+ * Checks if the element has an exception for a previous token.
+ *
+ * @return True if the element has a previous token matching exception.
+ */
+ public final boolean hasPreviousException() {
+ return exceptionValidPrevious;
+ }
+
+ /**
+ * Negates the meaning of match().
+ *
+ * @param negation
+ * - true if the meaning of match() is to be negated.
+ */
+ public final void setNegation(final boolean negation) {
+ this.negation = negation;
+ }
+
+ /**
+ * see {@link #setNegation}
+ *
+ * @since 0.9.3
+ */
+ public final boolean getNegation() {
+ return this.negation;
+ }
+
+ /**
+ *
+ * @return true when this element refers to another token.
+ */
+ public final boolean isReferenceElement() {
+ return containsMatches;
+ }
+
+ /**
+ * Sets the reference to another token.
+ *
+ * @param match
+ * Formatting object for the token reference.
+ */
+ public final void setMatch(final Match match) {
+ tokenReference = match;
+ containsMatches = true;
+ }
+
+ public final Match getMatch() {
+ return tokenReference;
+ }
+
+ /**
+ * Prepare Element for matching by formatting its string token and POS (if the
+ * Element is supposed to refer to some other token).
+ *
+ * @param token
+ * the token specified as {@link AnalyzedTokenReadings}
+ * @param synth
+ * the language synthesizer ({@link Synthesizer})
+ *
+ */
+ public final void compile(final AnalyzedTokenReadings token,
+ final Synthesizer synth) throws IOException {
+
+ m = null;
+ p = null;
+ tokenReference.setToken(token);
+ tokenReference.setSynthesizer(synth);
+
+ if (StringTools.isEmpty(referenceString)) {
+ referenceString = stringToken;
+ }
+ if (tokenReference.setsPos()) {
+ final String posReference = tokenReference.getTargetPosTag();
+ if (posReference != null) {
+ if (mPos != null) {
+ mPos = null;
+ }
+ setPosElement(posReference, tokenReference.posRegExp(), negation);
+ }
+ setStringElement(referenceString.replace("\\"
+ + tokenReference.getTokenRef(), ""));
+ inflected = true;
+ } else {
+ setStringElement(referenceString.replace("\\"
+ + tokenReference.getTokenRef(), tokenReference.toTokenString()));
+ }
+ }
+
+ /**
+ * Sets the phrase the element is in.
+ *
+ * @param s
+ * ID of the phrase.
+ */
+ public final void setPhraseName(final String s) {
+ phraseName = s;
+ }
+
+ /**
+ * Checks if the Element is in any phrase.
+ *
+ * @return True if the Element is contained in the phrase.
+ */
+ public final boolean isPartOfPhrase() {
+ return phraseName != null;
+ }
+
+ /**
+ * Whether the element matches case sensitively.
+ *
+ * @since 0.9.3
+ */
+ public final boolean getCaseSensitive() {
+ return caseSensitive;
+ }
+
+ /**
+ * Tests whether the element matches a regular expression.
+ *
+ * @since 0.9.6
+ */
+ public final boolean isRegularExpression() {
+ return stringRegExp;
+ }
+
+ /**
+ * @return the POS of the Element
+ * @since 0.9.6
+ */
+ public final String getPOStag() {
+ return posToken;
+ }
+
+ /**
+ * Tests whether the POS is negated.
+ *
+ * @return true if so.
+ */
+ public final boolean getPOSNegation() {
+ return posNegation;
+ }
+
+ /**
+ * Whether the token is inflected.
+ *
+ * @return True if so.
+ */
+ public final boolean isInflected() {
+ return inflected;
+ }
+
+ /**
+ * Gets the phrase the element is in.
+ *
+ * @return String The name of the phrase.
+ */
+ public final String getPhraseName() {
+ return phraseName;
+ }
+
+ public final boolean isUnified() {
+ return unified;
+ }
+
+ public final void setUnification(final Map<String, List<String>> uniFeatures) {
+ unificationFeatures = uniFeatures;
+ unified = true;
+ }
+
+ /**
+ * Get unification features and types.
+ * @return A map from features to a list of types.
+ * @since 1.0.1
+ */
+ public final Map<String, List<String>> getUniFeatures() {
+ return unificationFeatures;
+ }
+
+ public final void setUniNegation() {
+ uniNegation = true;
+ }
+
+ public final boolean isUniNegated() {
+ return uniNegation;
+ }
+
+ public final void setWhitespaceBefore(final boolean isWhite) {
+ whitespaceBefore = isWhite;
+ testWhitespace = true;
+ }
+
+ public final void setExceptionSpaceBefore(final boolean isWhite) {
+ if (exceptionList != null) {
+ exceptionList.get(exceptionList.size()).setWhitespaceBefore(isWhite);
+ }
+ }
+
+ public final boolean isWhitespaceBefore(final AnalyzedToken token) {
+ return whitespaceBefore == token.isWhitespaceBefore();
+ }
+
+ /**
+ * Since 1.0.0
+ * @return A List of Exceptions. Used for testing.
+ */
+ public final List<Element> getExceptionList() {
+ return exceptionList;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleLoader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleLoader.java
new file mode 100644
index 0000000..94c6515
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/FalseFriendRuleLoader.java
@@ -0,0 +1,356 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.ResourceBundle;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Loads {@link PatternRule}s from a false friends XML file.
+ *
+ * @author Daniel Naber
+ */
+public class FalseFriendRuleLoader extends DefaultHandler {
+
+ public FalseFriendRuleLoader() {
+ }
+
+ public final List<PatternRule> getRules(final InputStream file,
+ final Language textLanguage, final Language motherTongue)
+ throws ParserConfigurationException, SAXException, IOException {
+ final FalseFriendRuleHandler handler = new FalseFriendRuleHandler(
+ textLanguage, motherTongue);
+ final SAXParserFactory factory = SAXParserFactory.newInstance();
+ final SAXParser saxParser = factory.newSAXParser();
+ saxParser.getXMLReader()
+ .setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+ false);
+ saxParser.parse(file, handler);
+ final List<PatternRule> rules = handler.getRules();
+ // Add suggestions to each rule:
+ final ResourceBundle messages = ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", motherTongue.getLocale());
+ for (final PatternRule rule : rules) {
+ final List<String> suggestionMap = handler.getSuggestionMap().get(rule.getId());
+ if (suggestionMap != null) {
+ final MessageFormat msgFormat = new MessageFormat(messages
+ .getString("false_friend_suggestion"));
+ final Object[] msg = new Object[] { formatSuggestions(suggestionMap) };
+ rule.setMessage(rule.getMessage() + " " + msgFormat.format(msg));
+ }
+ }
+ return rules;
+ }
+
+ private String formatSuggestions(final List<String> l) {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = l.iterator(); iter.hasNext();) {
+ final String s = iter.next();
+ sb.append("<suggestion>");
+ sb.append(s);
+ sb.append("</suggestion>");
+ if (iter.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ return sb.toString();
+ }
+
+ /** Testing only. */
+ public final void main(final String[] args)
+ throws ParserConfigurationException, SAXException, IOException {
+ final FalseFriendRuleLoader prg = new FalseFriendRuleLoader();
+ List<PatternRule> l = prg.getRules(JLanguageTool.getDataBroker()
+ .getFromRulesDirAsStream("/false-friends.xml"), Language.ENGLISH,
+ Language.GERMAN);
+ System.out.println("Hints for German native speakers:");
+ for (final PatternRule rule : l) {
+ System.out.println(rule);
+ }
+ System.out.println("=======================================");
+ System.out.println("Hints for English native speakers:");
+ l = prg.getRules(JLanguageTool.getDataBroker()
+ .getFromRulesDirAsStream("/false-friends.xml"),
+ Language.GERMAN, Language.ENGLISH);
+ for (final PatternRule rule : l) {
+ System.out.println(rule);
+ }
+ }
+
+}
+
+class FalseFriendRuleHandler extends XMLRuleHandler {
+
+ private final ResourceBundle messages;
+ private final MessageFormat formatter;
+
+ private final Language textLanguage;
+ private final Language motherTongue;
+
+ private boolean defaultOff;
+
+ private Language language;
+ private Language translationLanguage;
+ private Language currentTranslationLanguage;
+ private List<StringBuilder> translations = new ArrayList<StringBuilder>();
+ private StringBuilder translation = new StringBuilder();
+ private final List<String> suggestions = new ArrayList<String>();
+ // rule ID -> list of translations:
+ private final Map<String, List<String>> suggestionMap = new HashMap<String, List<String>>();
+
+ private boolean inTranslation;
+
+ public FalseFriendRuleHandler(final Language textLanguage,
+ final Language motherTongue) {
+ messages = ResourceBundle.getBundle(
+ "de.danielnaber.languagetool.MessagesBundle", motherTongue.getLocale());
+ formatter = new MessageFormat("");
+ formatter.setLocale(motherTongue.getLocale());
+ this.textLanguage = textLanguage;
+ this.motherTongue = motherTongue;
+ }
+
+ public Map<String, List<String>> getSuggestionMap() {
+ return suggestionMap;
+ }
+
+ // ===========================================================
+ // SAX DocumentHandler methods
+ // ===========================================================
+
+ @Override
+ public void startElement(final String namespaceURI, final String lName,
+ final String qName, final Attributes attrs) throws SAXException {
+ if (qName.equals("rule")) {
+ translations = new ArrayList<StringBuilder>();
+ id = attrs.getValue("id");
+ if (!(inRuleGroup && defaultOff)) {
+ defaultOff = "off".equals(attrs.getValue("default"));
+ }
+ if (inRuleGroup && id == null) {
+ id = ruleGroupId;
+ }
+ correctExamples = new ArrayList<String>();
+ incorrectExamples = new ArrayList<IncorrectExample>();
+ } else if (qName.equals("pattern")) {
+ inPattern = true;
+ final String languageStr = attrs.getValue("lang");
+ language = Language.getLanguageForShortName(languageStr);
+ if (language == null) {
+ throw new SAXException("Unknown language '" + languageStr + "'");
+ }
+ } else if (qName.equals("exception")) {
+ inException = true;
+ exceptions = new StringBuilder();
+
+ if (attrs.getValue(NEGATE) != null) {
+ exceptionStringNegation = attrs.getValue(NEGATE).equals(YES);
+ }
+ if (attrs.getValue(SCOPE) != null) {
+ exceptionValidNext = attrs.getValue(SCOPE).equals("next");
+ exceptionValidPrev = attrs.getValue(SCOPE).equals("previous");
+ }
+ if (attrs.getValue(INFLECTED) != null) {
+ exceptionStringInflected = attrs.getValue(INFLECTED).equals(YES);
+ }
+ if (attrs.getValue(POSTAG) != null) {
+ exceptionPosToken = attrs.getValue(POSTAG);
+ if (attrs.getValue(POSTAG_REGEXP) != null) {
+ exceptionPosRegExp = attrs.getValue(POSTAG_REGEXP).equals(YES);
+ }
+ if (attrs.getValue(NEGATE_POS) != null) {
+ exceptionPosNegation = attrs.getValue(NEGATE_POS).equals(YES);
+ }
+ }
+ if (attrs.getValue(REGEXP) != null) {
+ exceptionStringRegExp = attrs.getValue(REGEXP).equals(YES);
+ }
+
+ } else if (qName.equals(TOKEN)) {
+ setToken(attrs);
+ } else if (qName.equals("translation")) {
+ inTranslation = true;
+ final String languageStr = attrs.getValue("lang");
+ final Language tmpLang = Language.getLanguageForShortName(languageStr);
+ currentTranslationLanguage = tmpLang;
+ if (tmpLang == motherTongue) {
+ translationLanguage = tmpLang;
+ if (translationLanguage == null) {
+ throw new SAXException("Unknown language '" + languageStr + "'");
+ }
+ }
+ } else if (qName.equals(EXAMPLE)
+ && attrs.getValue(TYPE).equals("correct")) {
+ inCorrectExample = true;
+ correctExample = new StringBuilder();
+ } else if (qName.equals(EXAMPLE)
+ && attrs.getValue(TYPE).equals("incorrect")) {
+ inIncorrectExample = true;
+ incorrectExample = new StringBuilder();
+ } else if (qName.equals("message")) {
+ inMessage = true;
+ message = new StringBuilder();
+ } else if (qName.equals("rulegroup")) {
+ ruleGroupId = attrs.getValue("id");
+ inRuleGroup = true;
+ defaultOff = "off".equals(attrs.getValue(DEFAULT));
+ }
+ }
+
+ @Override
+ public void endElement(final String namespaceURI, final String sName,
+ final String qName) {
+ if (qName.equals("rule")) {
+ if (language == textLanguage && translationLanguage != null
+ && translationLanguage == motherTongue && language != motherTongue
+ && !translations.isEmpty()) {
+ formatter.applyPattern(messages.getString("false_friend_hint"));
+ final Object[] messageArguments = {
+ elements.toString().replace('|', '/'),
+ messages.getString(textLanguage.getShortName()),
+ formatTranslations(translations),
+ messages.getString(motherTongue.getShortName()) };
+ final String description = formatter.format(messageArguments);
+ final PatternRule rule = new PatternRule(id, language, elementList,
+ messages.getString("false_friend_desc") + " "
+ + elements.toString().replace('|', '/'), description, messages
+ .getString("false_friend"));
+ rule.setCorrectExamples(correctExamples);
+ rule.setIncorrectExamples(incorrectExamples);
+ rule.setCategory(new Category(messages
+ .getString("category_false_friend")));
+ if (defaultOff) {
+ rule.setDefaultOff();
+ }
+ rules.add(rule);
+ }
+
+ if (elementList != null) {
+ elementList.clear();
+ }
+
+ } else if (qName.equals("exception")) {
+ inException = false;
+ if (!exceptionSet) {
+ tokenElement = new Element(elements.toString(), caseSensitive,
+ regExpression, tokenInflected);
+ exceptionSet = true;
+ }
+ tokenElement.setNegation(tokenNegated);
+ if (!StringTools.isEmpty(exceptions.toString())) {
+ tokenElement.setStringException(exceptions.toString(),
+ exceptionStringRegExp, exceptionStringInflected,
+ exceptionStringNegation, exceptionValidNext, exceptionValidPrev);
+ }
+ if (exceptionPosToken != null) {
+ tokenElement.setPosException(exceptionPosToken, exceptionPosRegExp,
+ exceptionPosNegation, exceptionValidNext, exceptionValidPrev);
+ exceptionPosToken = null;
+ }
+ } else if (qName.equals(TOKEN)) {
+ finalizeTokens();
+ } else if (qName.equals("pattern")) {
+ inPattern = false;
+ } else if (qName.equals("translation")) {
+ if (currentTranslationLanguage == motherTongue) {
+ translations.add(translation);
+ }
+ if (currentTranslationLanguage == textLanguage) {
+ suggestions.add(translation.toString());
+ }
+ translation = new StringBuilder();
+ inTranslation = false;
+ currentTranslationLanguage = null;
+ } else if (qName.equals(EXAMPLE)) {
+ if (inCorrectExample) {
+ correctExamples.add(correctExample.toString());
+ } else if (inIncorrectExample) {
+ incorrectExamples
+ .add(new IncorrectExample(incorrectExample.toString()));
+ }
+ inCorrectExample = false;
+ inIncorrectExample = false;
+ correctExample = new StringBuilder();
+ incorrectExample = new StringBuilder();
+ } else if (qName.equals("message")) {
+ inMessage = false;
+ } else if (qName.equals("rulegroup")) {
+ if (!suggestions.isEmpty()) {
+ final List<String> l = new ArrayList<String>(suggestions);
+ suggestionMap.put(id, l);
+ suggestions.clear();
+ }
+ inRuleGroup = false;
+ }
+ }
+
+ private String formatTranslations(final List<StringBuilder> translations) {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<StringBuilder> iter = translations.iterator(); iter
+ .hasNext();) {
+ final StringBuilder trans = iter.next();
+ sb.append('"');
+ sb.append(trans.toString());
+ sb.append('"');
+ if (iter.hasNext()) {
+ sb.append(", ");
+ }
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public void characters(final char[] buf, final int offset, final int len) {
+ final String s = new String(buf, offset, len);
+ if (inException) {
+ exceptions.append(s);
+ } else if (inToken && inPattern) {
+ elements.append(s);
+ } else if (inCorrectExample) {
+ correctExample.append(s);
+ } else if (inIncorrectExample) {
+ incorrectExample.append(s);
+ } else if (inTranslation) {
+ translation.append(s);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Match.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Match.java
new file mode 100644
index 0000000..0519f2c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Match.java
@@ -0,0 +1,551 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Reference to a matched token in a pattern, can be formatted and used for
+ * matching & suggestions.
+ *
+ * @author Marcin Miłkowski
+ */
+public class Match {
+
+ /** Possible string case conversions. **/
+ public enum CaseConversion {
+ NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER;
+
+ /**
+ * Converts string to the constant enum.
+ *
+ * @param str
+ * String value to be converted.
+ * @return CaseConversion enum.
+ */
+ public static CaseConversion toCase(final String str) {
+ try {
+ return valueOf(str);
+ } catch (final Exception ex) {
+ return NONE;
+ }
+ }
+ }
+
+ public enum IncludeRange {
+ NONE, FOLLOWING, ALL;
+
+ /**
+ * Converts string to the constant enum.
+ *
+ * @param str
+ * String value to be converted.
+ * @return IncludeRange enum.
+ */
+ public static IncludeRange toRange(final String str) {
+ try {
+ return valueOf(str);
+ } catch (final Exception ex) {
+ return NONE;
+ }
+ }
+ }
+
+ private final String posTag;
+ private boolean postagRegexp;
+ private final String regexReplace;
+ private final String posTagReplace;
+ private final CaseConversion caseConversionType;
+
+ private final IncludeRange includeSkipped;
+ private String skippedTokens;
+
+ /**
+ * True if this match element formats a statically defined lemma which is
+ * enclosed by the element, e.g., <tt>&lt;match...&gt;word&lt;/word&gt;</tt>.
+ */
+ private boolean staticLemma;
+
+ /**
+ * True if this match element is used for formatting POS token.
+ */
+ private final boolean setPos;
+
+ private AnalyzedTokenReadings formattedToken;
+ private AnalyzedTokenReadings matchedToken;
+
+ private int tokenRef;
+
+ /** Word form generator for POS tags. **/
+ private Synthesizer synthesizer;
+
+ /** Pattern used to define parts of the matched token. **/
+ private Pattern pRegexMatch;
+
+ /** Pattern used to define parts of the matched POS token. **/
+ private Pattern pPosRegexMatch;
+
+ /**
+ * True when the match is not in the suggestion.
+ */
+ private boolean inMessageOnly;
+
+ public Match(final String posTag, final String posTagReplace,
+ final boolean postagRegexp, final String regexMatch,
+ final String regexReplace, final CaseConversion caseConversionType,
+ final boolean setPOS,
+ final IncludeRange includeSkipped) {
+ this.posTag = posTag;
+ this.postagRegexp = postagRegexp;
+ this.caseConversionType = caseConversionType;
+
+ if (regexMatch != null) {
+ pRegexMatch = Pattern.compile(regexMatch);
+ }
+ if (postagRegexp && posTag != null) {
+ pPosRegexMatch = Pattern.compile(posTag);
+ }
+
+ this.regexReplace = regexReplace;
+ this.posTagReplace = posTagReplace;
+ this.setPos = setPOS;
+ this.includeSkipped = includeSkipped;
+ }
+
+ /**
+ * Sets the token that will be formatted or otherwise used in the class.
+ */
+ public final void setToken(final AnalyzedTokenReadings token) {
+ if (staticLemma) {
+ matchedToken = token;
+ } else {
+ formattedToken = token;
+ }
+ }
+
+ /**
+ * Sets the token to be formatted etc. and includes the support for
+ * including the skipped tokens.
+ * @param tokens Array of tokens
+ * @param index Index of the token to be formatted
+ * @param next Position of the next token (the skipped tokens
+ * are the ones between the tokens[index] and tokens[next]
+ */
+ public final void setToken(final AnalyzedTokenReadings[] tokens, final int index, final int next) {
+ setToken(tokens[index]);
+ if (next > 1 && includeSkipped != IncludeRange.NONE) {
+ final StringBuilder sb = new StringBuilder();
+ if (includeSkipped == IncludeRange.FOLLOWING) {
+ formattedToken = null;
+ }
+ for (int k = index + 1; k < index + next; k++) {
+ if (k > index + 1 &&
+ tokens[k].isWhitespaceBefore()) {
+ sb.append(' ');
+ }
+ sb.append(tokens[k].getToken());
+ }
+ skippedTokens = sb.toString();
+ } else {
+ skippedTokens = "";
+ }
+ }
+
+ /**
+ private String[] addSkipped(final String[] formattedString) {
+ if (skippedTokens != null && !"".equals(skippedTokens)) {
+ String[] finalStrings = new String[formattedString.length];
+ for (int i = 1; i <= formattedString.length; i++)
+ }
+ }
+
+ **/
+
+ /**
+ * Checks if the Match element is used for setting the part of speech Element.
+ *
+ * @return True if Match sets POS.
+ */
+ public final boolean setsPos() {
+ return setPos;
+ }
+
+ /**
+ * Checks if the Match element uses regexp-based form of the POS tag.
+ *
+ * @return True if regexp is used in POS.
+ */
+ public final boolean posRegExp() {
+ return postagRegexp;
+ }
+
+ /**
+ * Sets a base form (lemma) that will be formatted, or synthesized, using the
+ * specified POS regular expressions.
+ *
+ * @param lemmaString String that specifies the base form.
+ */
+ public final void setLemmaString(final String lemmaString) {
+ if (!StringTools.isEmpty(lemmaString)) {
+ formattedToken = new AnalyzedTokenReadings(new AnalyzedToken(lemmaString,
+ posTag, lemmaString), 0);
+ staticLemma = true;
+ postagRegexp = true;
+ if (posTag != null) {
+ pPosRegexMatch = Pattern.compile(posTag);
+ }
+ }
+ }
+
+ /**
+ * Sets a synthesizer used for grammatical synthesis of forms based on
+ * formatted POS values.
+ *
+ * @param synth Synthesizer class.
+ */
+ public final void setSynthesizer(final Synthesizer synth) {
+ synthesizer = synth;
+ }
+
+ /**
+ * Gets all strings formatted using the match element.
+ *
+ * @return array of strings
+ * @throws IOException
+ * in case of synthesizer-related disk problems.
+ */
+ public final String[] toFinalString() throws IOException {
+ String[] formattedString = new String[1];
+ if (formattedToken != null) {
+ final int readingCount = formattedToken.getReadingsLength();
+ formattedString[0] = formattedToken.getToken();
+ if (pRegexMatch != null) {
+ formattedString[0] = pRegexMatch.matcher(formattedString[0])
+ .replaceAll(regexReplace);
+ }
+ formattedString[0] = convertCase(formattedString[0]);
+ if (posTag != null) {
+ if (synthesizer == null) {
+ formattedString[0] = formattedToken.getToken();
+ } else if (postagRegexp) {
+ final TreeSet<String> wordForms = new TreeSet<String>();
+ boolean oneForm = false;
+ for (int k = 0; k < readingCount; k++) {
+ if (formattedToken.getAnalyzedToken(k).getLemma() == null) {
+ final String posUnique = formattedToken.getAnalyzedToken(k)
+ .getPOSTag();
+ if (posUnique == null) {
+ wordForms.add(formattedToken.getToken());
+ oneForm = true;
+ } else {
+ if (JLanguageTool.SENTENCE_START_TAGNAME.equals(posUnique)
+ || JLanguageTool.SENTENCE_END_TAGNAME.equals(posUnique)
+ || JLanguageTool.PARAGRAPH_END_TAGNAME.equals(posUnique)) {
+ if (!oneForm) {
+ wordForms.add(formattedToken.getToken());
+ }
+ oneForm = true;
+ } else {
+ oneForm = false;
+ }
+ }
+ }
+ }
+ final String targetPosTag = getTargetPosTag();
+ if (!oneForm) {
+ for (int i = 0; i < readingCount; i++) {
+ final String[] possibleWordForms = synthesizer.synthesize(
+ formattedToken.getAnalyzedToken(i), targetPosTag, true);
+ if (possibleWordForms != null) {
+ wordForms.addAll(Arrays.asList(possibleWordForms));
+ }
+ }
+ }
+ if (wordForms.isEmpty()) {
+ formattedString[0] = "(" + formattedToken.getToken() + ")";
+ } else {
+ formattedString = wordForms.toArray(new String[wordForms.size()]);
+ }
+ } else {
+ final TreeSet<String> wordForms = new TreeSet<String>();
+ for (int i = 0; i < readingCount; i++) {
+ final String[] possibleWordForms = synthesizer.synthesize(
+ formattedToken.getAnalyzedToken(i), posTag);
+ if (possibleWordForms != null) {
+ wordForms.addAll(Arrays.asList(possibleWordForms));
+ }
+ }
+ formattedString = wordForms.toArray(new String[wordForms.size()]);
+ }
+ }
+ }
+ if (includeSkipped != IncludeRange.NONE
+ && skippedTokens != null && !"".equals(skippedTokens)) {
+ final String[] helper = new String[formattedString.length];
+ for (int i = 0; i < formattedString.length; i++) {
+ if (formattedString[i] == null) {
+ formattedString[i] = "";
+ }
+ helper[i] = formattedString[i] + skippedTokens;
+ }
+ formattedString = helper;
+ }
+ return formattedString;
+ }
+
+ /**
+ * Format POS tag using parameters already defined in the class.
+ *
+ * @return Formatted POS tag as String.
+ */
+ // FIXME: gets only the first POS tag that matches, this can be wrong
+ // on the other hand, many POS tags = too many suggestions?
+ public final String getTargetPosTag() {
+ String targetPosTag = posTag;
+ final List<String> posTags = new ArrayList<String>();
+ if (staticLemma) {
+ final int numRead = matchedToken.getReadingsLength();
+ for (int i = 0; i < numRead; i++) {
+ final String tst = matchedToken.getAnalyzedToken(i).getPOSTag();
+ if (tst != null && pPosRegexMatch.matcher(tst).matches()) {
+ targetPosTag = matchedToken.getAnalyzedToken(i).getPOSTag();
+ posTags.add(targetPosTag);
+ }
+ }
+ if (pPosRegexMatch != null && posTagReplace != null) {
+ targetPosTag = pPosRegexMatch.matcher(targetPosTag).replaceAll(
+ posTagReplace);
+ }
+ } else {
+ final int numRead = formattedToken.getReadingsLength();
+ for (int i = 0; i < numRead; i++) {
+ final String tst = formattedToken.getAnalyzedToken(i).getPOSTag();
+ if (tst != null && pPosRegexMatch.matcher(tst).matches()) {
+ targetPosTag = formattedToken.getAnalyzedToken(i).getPOSTag();
+ posTags.add(targetPosTag);
+ }
+ }
+ if (pPosRegexMatch != null && posTagReplace != null) {
+ if (posTags.isEmpty()) {
+ posTags.add(targetPosTag);
+ }
+ final StringBuilder sb = new StringBuilder();
+ final int posTagLen = posTags.size();
+ int l = 0;
+ for (String lposTag : posTags) {
+ l++;
+ lposTag = pPosRegexMatch.matcher(lposTag).replaceAll(posTagReplace);
+ if (setPos) {
+ lposTag = synthesizer.getPosTagCorrection(lposTag);
+ }
+ sb.append(lposTag);
+ if (l < posTagLen) {
+ sb.append('|');
+ }
+ }
+ targetPosTag = sb.toString();
+ }
+ }
+ return targetPosTag;
+ }
+
+ /**
+ * Method for getting the formatted match as a single string. In case of
+ * multiple matches, it joins them using a regular expression operator "|".
+ *
+ * @return Formatted string of the matched token.
+ */
+ public final String toTokenString() throws IOException {
+ final StringBuilder output = new StringBuilder();
+ final String[] stringToFormat = toFinalString();
+ for (int i = 0; i < stringToFormat.length; i++) {
+ output.append(stringToFormat[i]);
+ if (i + 1 < stringToFormat.length) {
+ output.append('|');
+ }
+ }
+ return output.toString();
+ }
+
+ /**
+ * Sets the token number referenced by the match.
+ *
+ * @param i Token number.
+ */
+ public final void setTokenRef(final int i) {
+ tokenRef = i;
+ }
+
+ /**
+ * Gets the token number referenced by the match.
+ *
+ * @return int - token number.
+ */
+ public final int getTokenRef() {
+ return tokenRef;
+ }
+
+ /**
+ * Converts case of the string token according to match element attributes.
+ *
+ * @param s Token to be converted.
+ * @return Converted string.
+ */
+ private String convertCase(final String s) {
+ if (StringTools.isEmpty(s)) {
+ return s;
+ }
+ String token = s;
+ switch (caseConversionType) {
+ case NONE:
+ break;
+ case STARTLOWER:
+ token = token.substring(0, 1).toLowerCase() + token.substring(1);
+ break;
+ case STARTUPPER:
+ token = token.substring(0, 1).toUpperCase() + token.substring(1);
+ break;
+ case ALLUPPER:
+ token = token.toUpperCase();
+ break;
+ case ALLLOWER:
+ token = token.toLowerCase();
+ break;
+ default:
+ break;
+ }
+ return token;
+ }
+
+ /**
+ * Used to let LT know that it should change the case of the match.
+ *
+ * @return true if match converts the case of the token.
+ */
+ public final boolean convertsCase() {
+ return !caseConversionType.equals(CaseConversion.NONE);
+ }
+
+ public final AnalyzedTokenReadings filterReadings() {
+ final ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ if (formattedToken != null) {
+ if (staticLemma) {
+ formattedToken = new AnalyzedTokenReadings(new AnalyzedToken(
+ matchedToken.getToken(), posTag, formattedToken.getToken()),
+ matchedToken.getStartPos());
+ formattedToken.setWhitespaceBefore(matchedToken.isWhitespaceBefore());
+ }
+ String token = formattedToken.getToken();
+ if (pRegexMatch != null) {
+ token = pRegexMatch.matcher(token).replaceAll(regexReplace);
+ }
+ token = convertCase(token);
+ if (posTag != null) {
+ final int numRead = formattedToken.getReadingsLength();
+ if (postagRegexp) {
+ String targetPosTag = posTag;
+ for (int i = 0; i < numRead; i++) {
+ final String tst = formattedToken.getAnalyzedToken(i).getPOSTag();
+ if (tst != null && pPosRegexMatch.matcher(tst).matches()) {
+ targetPosTag = formattedToken.getAnalyzedToken(i).getPOSTag();
+ if (posTagReplace != null) {
+ targetPosTag = pPosRegexMatch.matcher(targetPosTag).replaceAll(
+ posTagReplace);
+ }
+ l
+ .add(new AnalyzedToken(token, targetPosTag, formattedToken
+ .getAnalyzedToken(i).getLemma()));
+ l.get(l.size() - 1).setWhitespaceBefore(formattedToken.isWhitespaceBefore());
+ }
+ }
+ if (l.isEmpty()) {
+ for (final AnalyzedToken anaTok : getNewToken(numRead, token)) {
+ l.add(anaTok);
+ }
+ }
+ } else {
+ for (final AnalyzedToken anaTok : getNewToken(numRead, token)) {
+ l.add(anaTok);
+ }
+ }
+ if (formattedToken.isSentEnd()) {
+ l.add(new AnalyzedToken(formattedToken.getToken(),
+ JLanguageTool.SENTENCE_END_TAGNAME,
+ formattedToken.getAnalyzedToken(0).getLemma()));
+ }
+ if (formattedToken.isParaEnd()) {
+ l.add(new AnalyzedToken(formattedToken.getToken(),
+ JLanguageTool.PARAGRAPH_END_TAGNAME,
+ formattedToken.getAnalyzedToken(0).getLemma()));
+ }
+ }
+ }
+ if (l.isEmpty()) {
+ return formattedToken;
+ }
+ return new AnalyzedTokenReadings(l.toArray(new AnalyzedToken[l.size()]), formattedToken.getStartPos());
+ }
+
+ private AnalyzedToken[] getNewToken(final int numRead, final String token) {
+ final List<AnalyzedToken> list = new ArrayList<AnalyzedToken>();
+ String lemma = "";
+ for (int j = 0; j < numRead; j++) {
+ if (formattedToken.getAnalyzedToken(j).getPOSTag() != null) {
+ if (formattedToken.getAnalyzedToken(j).getPOSTag().equals(posTag)
+ && (formattedToken.getAnalyzedToken(j).getLemma() != null)) {
+ lemma = formattedToken.getAnalyzedToken(j).getLemma();
+ }
+ if (StringTools.isEmpty(lemma)) {
+ lemma = formattedToken.getAnalyzedToken(0).getLemma();
+ }
+ list.add(new AnalyzedToken(token, posTag, lemma));
+ list.get(list.size() - 1).
+ setWhitespaceBefore(formattedToken.isWhitespaceBefore());
+ }
+ }
+ return list.toArray(new AnalyzedToken[list.size()]);
+ }
+
+ /**
+ * @param inMessageOnly
+ * the inMessageOnly to set
+ */
+ public void setInMessageOnly(final boolean inMessageOnly) {
+ this.inMessageOnly = inMessageOnly;
+ }
+
+ /**
+ * @return the inMessageOnly
+ */
+ public boolean isInMessageOnly() {
+ return inMessageOnly;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRule.java
new file mode 100644
index 0000000..843ef98
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRule.java
@@ -0,0 +1,652 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A Rule that describes a language error as a simple pattern of words or of
+ * part-of-speech tags.
+ *
+ * @author Daniel Naber
+ */
+public class PatternRule extends AbstractPatternRule {
+
+ private static final String SUGG_TAG = "<suggestion>";
+ private static final String END_SUGG_TAG = "</suggestion>";
+
+ private String subId; // because there can be more than one rule in a rule
+ // group
+
+ private String message;
+ private String shortMessage;
+
+ /** Formatted suggestion elements. **/
+ private List<Match> suggestionMatches;
+
+ /**
+ * A list of elements as they appear in XML file (phrases count as single
+ * tokens in case of matches or skipping).
+ */
+ private List<Integer> elementNo;
+
+ /**
+ * This property is used for short-circuiting evaluation of the elementNo list
+ * order.
+ */
+ private boolean useList;
+
+ /**
+ * Marks whether the rule is a member of a disjunctive set (in case of OR
+ * operation on phraserefs).
+ **/
+ private boolean isMemberOfDisjunctiveSet;
+
+ /**
+ * @param id
+ * Id of the Rule
+ * @param language
+ * Language of the Rule
+ * @param elements
+ * Element (token) list
+ * @param description
+ * Description to be shown (name)
+ * @param message
+ * Message to be displayed to the user
+ */
+
+ public PatternRule(final String id, final Language language,
+ final List<Element> elements, final String description,
+ final String message, final String shortMessage) {
+ super(id, description, language, elements, false);
+ if (id == null) {
+ throw new NullPointerException("id cannot be null");
+ }
+ if (language == null) {
+ throw new NullPointerException("language cannot be null");
+ }
+ if (elements == null) {
+ throw new NullPointerException("elements cannot be null");
+ }
+ if (description == null) {
+ throw new NullPointerException("description cannot be null");
+ }
+
+ this.message = message;
+ this.shortMessage = shortMessage;
+ this.elementNo = new ArrayList<Integer>();
+ String prevName = "";
+ String curName = "";
+ int cnt = 0;
+ int loopCnt = 0;
+ for (final Element e : patternElements) {
+ if (e.isPartOfPhrase()) {
+ curName = e.getPhraseName();
+ if (prevName.equals(curName) || StringTools.isEmpty(prevName)) {
+ cnt++;
+ useList = true;
+ } else {
+ elementNo.add(cnt);
+ prevName = "";
+ curName = "";
+ cnt = 0;
+ }
+ prevName = curName;
+ loopCnt++;
+ if (loopCnt == patternElements.size() && !StringTools.isEmpty(prevName)) {
+ elementNo.add(cnt);
+ }
+ } else {
+ if (cnt > 0) {
+ elementNo.add(cnt);
+ }
+ elementNo.add(1);
+ loopCnt++;
+ }
+ }
+ }
+
+ public PatternRule(final String id, final Language language,
+ final List<Element> elements, final String description,
+ final String message, final String shortMessage, final boolean isMember) {
+ this(id, language, elements, description, message, shortMessage);
+ this.isMemberOfDisjunctiveSet = isMember;
+ }
+
+ public final String getSubId() {
+ return subId;
+ }
+
+ public final void setSubId(final String subId) {
+ this.subId = subId;
+ }
+
+ public final String getMessage() {
+ return message;
+ }
+
+ /**
+ * Used for testing rules: only one of the set can match.
+ *
+ * @return Whether the rule can non-match (as a member of disjunctive set of
+ * rules generated by phraseref in includephrases element).
+ */
+ public final boolean isWithComplexPhrase() {
+ return isMemberOfDisjunctiveSet;
+ }
+
+ /** Reset complex status - used for testing. **/
+ public final void notComplexPhrase() {
+ isMemberOfDisjunctiveSet = false;
+ }
+
+ /**
+ * Return the pattern as a string.
+ *
+ * @since 0.9.2
+ */
+ public final String toPatternString() {
+ final List<String> strList = new ArrayList<String>();
+ for (Element patternElement : patternElements) {
+ strList.add(patternElement.toString());
+ }
+ return StringTools.listToString(strList, ", ");
+ }
+
+ /**
+ * Return the pattern as an XML string. FIXME: this is not complete, information might be lost!
+ *
+ * @since 0.9.3
+ */
+ public final String toXML() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("<rule id=\"");
+ sb.append(StringTools.escapeXML(getId()));
+ sb.append("\" name=\"");
+ sb.append(StringTools.escapeXML(getDescription()));
+ sb.append("\">\n");
+ sb.append("<pattern mark_from=\"");
+ sb.append(startPositionCorrection);
+ sb.append("\" mark_to=\"");
+ sb.append(endPositionCorrection);
+ sb.append('"');
+ // for now, case sensitivity is per pattern, not per element,
+ // so just use the setting of the first element:
+ if (!patternElements.isEmpty() && patternElements.get(0).getCaseSensitive()) {
+ sb.append(" case_sensitive=\"yes\"");
+ }
+ sb.append(">\n");
+ for (Element patternElement : patternElements) {
+ sb.append("<token");
+ if (patternElement.getNegation()) {
+ sb.append(" negate=\"yes\"");
+ }
+ if (patternElement.isRegularExpression()) {
+ sb.append(" regexp=\"yes\"");
+ }
+ if (patternElement.getPOStag() != null) {
+ sb.append(" postag=\"");
+ sb.append(patternElement.getPOStag());
+ sb.append('"');
+ }
+ if (patternElement.getPOSNegation()) {
+ sb.append(" negate_pos=\"yes\"");
+ }
+ if (patternElement.isInflected()) {
+ sb.append(" inflected=\"yes\"");
+ }
+ sb.append('>');
+ if (patternElement.getString() != null) {
+ sb.append(StringTools.escapeXML(patternElement.getString()));
+ } else {
+ // TODO
+ }
+ sb.append("</token>\n");
+ }
+ sb.append("</pattern>\n");
+ sb.append("<message>");
+ sb.append(StringTools.escapeXML(message));
+ sb.append("</message>\n");
+ if (getIncorrectExamples() != null) {
+ for (IncorrectExample example : getIncorrectExamples()) {
+ sb.append("<example type=\"incorrect\">");
+ sb.append(StringTools.escapeXML(example.getExample()));
+ sb.append("</example>\n");
+ }
+ }
+ if (getCorrectExamples() != null) {
+ for (String example : getCorrectExamples()) {
+ sb.append("<example type=\"correct\">");
+ sb.append(StringTools.escapeXML(example));
+ sb.append("</example>\n");
+ }
+ }
+ sb.append("</rule>");
+ return sb.toString();
+ }
+
+ public final void setMessage(final String message) {
+ this.message = message;
+ }
+
+ @Override
+ public final RuleMatch[] match(final AnalyzedSentence text)
+ throws IOException {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ final int[] tokenPositions = new int[tokens.length + 1];
+ final int patternSize = patternElements.size();
+ final int limit = Math.max(0, tokens.length - patternSize + 1);
+ Element elem = null;
+ int i = 0;
+ while (i < limit && !(sentStart && i > 0)) {
+ boolean allElementsMatch = false;
+ int firstMatchToken = -1;
+ int lastMatchToken = -1;
+ int matchingTokens = 0;
+ int prevSkipNext = 0;
+ // this variable keeps the total number
+ // of tokens skipped
+ int skipShiftTotal = 0;
+ if (testUnification) {
+ unifier.reset();
+ }
+ for (int k = 0; k < patternSize; k++) {
+ final Element prevElement = elem;
+ elem = patternElements.get(k);
+ setupRef(firstMatchToken, elem, tokens);
+ final int nextPos = i + k + skipShiftTotal;
+ prevMatched = false;
+ if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // SENT_END?
+ prevSkipNext = tokens.length - (nextPos + 1);
+ }
+ final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - (patternSize - k));
+ for (int m = nextPos; m <= maxTok; m++) {
+ allElementsMatch = testAllReadings(tokens, elem, prevElement, m,
+ firstMatchToken, prevSkipNext);
+ if (allElementsMatch) {
+ lastMatchToken = m;
+ final int skipShift = lastMatchToken - nextPos;
+ tokenPositions[matchingTokens] = skipShift + 1;
+ prevSkipNext = translateElementNo(elem.getSkipNext());
+ matchingTokens++;
+ skipShiftTotal += skipShift;
+ if (firstMatchToken == -1) {
+ firstMatchToken = lastMatchToken;
+ }
+ break;
+ }
+ }
+ if (!allElementsMatch) {
+ break;
+ }
+ }
+
+ if (allElementsMatch && matchingTokens == patternSize) {
+ final RuleMatch rM = createRuleMatch(tokenPositions, tokens,
+ firstMatchToken, lastMatchToken, matchingTokens);
+ if (rM != null) {
+ ruleMatches.add(rM);
+ }
+ }
+ i++;
+ }
+ return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+ }
+
+ private RuleMatch createRuleMatch(final int[] tokenPositions,
+ final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
+ final int lastMatchToken, final int matchingTokens) throws IOException {
+ final String errMessage = formatMatches(tokens, tokenPositions,
+ firstMatchToken, message);
+ int correctedStPos = 0;
+ if (startPositionCorrection > 0) {
+ for (int l = 0; l <= startPositionCorrection; l++) {
+ correctedStPos += tokenPositions[l];
+ }
+ correctedStPos--;
+ }
+ int correctedEndPos = 0;
+ if (endPositionCorrection < 0) {
+ int l = 0;
+ while (l > endPositionCorrection) {
+ correctedEndPos -= tokenPositions[matchingTokens + l - 1];
+ l--;
+ }
+ }
+ AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
+ + correctedStPos];
+ boolean startsWithUppercase = StringTools
+ .startsWithUppercase(firstMatchTokenObj.getToken())
+ && !matchConvertsCase();
+
+ if (firstMatchTokenObj.isSentStart()
+ && tokens.length > firstMatchToken + correctedStPos + 1) {
+ // make uppercasing work also at sentence start:
+ firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
+ startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj
+ .getToken());
+ }
+ int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
+ // FIXME: this is fishy, assumes that comma should always come before
+ // whitespace
+ if (errMessage.contains(SUGG_TAG + ",")
+ && firstMatchToken + correctedStPos >= 1) {
+ fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
+ + tokens[firstMatchToken + correctedStPos - 1].getToken().length();
+ }
+
+ final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
+ + tokens[lastMatchToken + correctedEndPos].getToken().length();
+ if (fromPos < toPos) { // this can happen with some skip="-1" when the last
+ // token is not matched
+ return new RuleMatch(this, fromPos, toPos,
+ errMessage, shortMessage, startsWithUppercase);
+ } // failed to create any rule match...
+ return null;
+ }
+
+ /**
+ * Checks if the suggestion starts with a match that is supposed to convert
+ * case. If it does, stop the default conversion to uppercase.
+ *
+ * @return true, if the match converts the case of the token.
+ */
+ private boolean matchConvertsCase() {
+ if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
+ final int sugStart = message.indexOf(SUGG_TAG) + SUGG_TAG.length();
+ for (Match sMatch : suggestionMatches) {
+ if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
+ && message.charAt(sugStart) == '\\') {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public final void addSuggestionMatch(final Match m) {
+ if (suggestionMatches == null) {
+ suggestionMatches = new ArrayList<Match>();
+ }
+ suggestionMatches.add(m);
+ }
+
+ /**
+ * Gets the index of the element indexed by i, adding any offsets because of
+ * the phrases in the rule.
+ *
+ * @param i
+ * Current element index.
+ * @return int Index translated into XML element no.
+ */
+ private int translateElementNo(final int i) {
+ if (!useList || i < 0) {
+ return i;
+ }
+ int j = 0;
+ for (int k = 0; k < i; k++) {
+ j += elementNo.get(k);
+ }
+ return j;
+ }
+
+ /**
+ * Returns true when the token in the rule references a phrase composed of
+ * many tokens.
+ *
+ * @param i
+ * The index of the token.
+ * @return true if the phrase is under the index, false otherwise.
+ **/
+ private int phraseLen(final int i) {
+ if (!useList || i > (elementNo.size() - 1)) {
+ return 1;
+ }
+ return elementNo.get(i);
+ }
+
+ /**
+ * Creates a Cartesian product of the arrays stored in the input array.
+ *
+ * @param input
+ * Array of string arrays to combine.
+ * @param output
+ * Work array of strings.
+ * @param r
+ * Starting parameter (use 0 to get all combinations).
+ * @param lang
+ * Text language for adding spaces in some languages.
+ * @return Combined array of @String.
+ */
+ private static String[] combineLists(final String[][] input,
+ final String[] output, final int r, final Language lang) {
+ final List<String> outputList = new ArrayList<String>();
+ if (r == input.length) {
+ final StringBuilder sb = new StringBuilder();
+ for (int k = 0; k < output.length; k++) {
+ sb.append(output[k]);
+ if (k < output.length - 1) {
+ sb.append(StringTools.addSpace(output[k + 1], lang));
+ }
+ }
+ outputList.add(sb.toString());
+ } else {
+ for (int c = 0; c < input[r].length; c++) {
+ output[r] = input[r][c];
+ final String[] sList = combineLists(input, output, r + 1, lang);
+ outputList.addAll(Arrays.asList(sList));
+ }
+ }
+ return outputList.toArray(new String[outputList.size()]);
+ }
+
+ /**
+ * Concatenates the matches, and takes care of phrases (including inflection
+ * using synthesis).
+ *
+ * @param start
+ * Position of the element as referenced by match element in the
+ * rule.
+ * @param index
+ * The index of the element found in the matching sentence.
+ * @param tokenIndex
+ * The position of the token in the AnalyzedTokenReadings array.
+ * @param tokens
+ * Array of @AnalyzedTokenReadings
+ * @return @String[] Array of concatenated strings
+ * @throws IOException
+ * in case disk operations (used in synthesizer) go wrong.
+ */
+ private String[] concatMatches(final int start, final int index,
+ final int tokenIndex, final AnalyzedTokenReadings[] tokens,
+ final int nextTokenPos)
+ throws IOException {
+ String[] finalMatch = null;
+ if (suggestionMatches.get(start) != null) {
+ final int len = phraseLen(index);
+ if (len == 1) {
+ final int skippedTokens = nextTokenPos - tokenIndex;
+ suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, skippedTokens);
+ suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
+ finalMatch = suggestionMatches.get(start).toFinalString();
+ } else {
+ final List<String[]> matchList = new ArrayList<String[]>();
+ for (int i = 0; i < len; i++) {
+ final int skippedTokens = nextTokenPos - (tokenIndex + i);
+ suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, skippedTokens);
+ suggestionMatches.get(start)
+ .setSynthesizer(language.getSynthesizer());
+ matchList.add(suggestionMatches.get(start).toFinalString());
+ }
+ return combineLists(matchList.toArray(new String[matchList.size()][]),
+ new String[matchList.size()], 0, language);
+ }
+ }
+ return finalMatch;
+ }
+
+ /**
+ * Replace back references generated with &lt;match&gt; and \\1 in message
+ * using Match class, and take care of skipping. *
+ *
+ * @param tokenReadings
+ * Array of AnalyzedTokenReadings that were matched against the
+ * pattern
+ * @param positions
+ * Array of relative positions of matched tokens
+ * @param firstMatchTok
+ * Position of the first matched token
+ * @param errorMsg
+ * String containing suggestion markup
+ * @return String Formatted message.
+ * @throws IOException
+ *
+ **/
+ private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
+ final int[] positions, final int firstMatchTok, final String errorMsg)
+ throws IOException {
+ String errorMessage = errorMsg;
+ int matchCounter = 0;
+ final int[] numbersToMatches = new int[errorMsg.length()];
+ boolean newWay = false;
+ int errLen = errorMessage.length();
+ int errMarker = errorMessage.indexOf('\\');
+ boolean numberFollows = false;
+ if (errMarker > 0 && errMarker < errLen - 1) {
+ numberFollows = StringTools.isPositiveNumber(errorMessage
+ .charAt(errMarker + 1));
+ }
+ while (errMarker > 0 && numberFollows) {
+ final int ind = errorMessage.indexOf('\\');
+ if (ind > 0 && StringTools.isPositiveNumber(errorMessage.charAt(ind + 1))) {
+ int numLen = 1;
+ while (ind + numLen < errorMessage.length()
+ && StringTools.isPositiveNumber(errorMessage.charAt(ind + numLen))) {
+ numLen++;
+ }
+ final int j = Integer.parseInt(errorMessage.substring(ind + 1, ind
+ + numLen)) - 1;
+ int repTokenPos = 0;
+ int nextTokenPos = 0;
+ for (int l = 0; l <= j; l++) {
+ repTokenPos += positions[l];
+ }
+ if (j <= positions.length) {
+ nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
+ }
+ if (suggestionMatches != null) {
+ if (matchCounter < suggestionMatches.size()) {
+ numbersToMatches[j] = matchCounter;
+ if (suggestionMatches.get(matchCounter) != null) {
+ final String[] matches = concatMatches(matchCounter, j,
+ firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
+ final String leftSide = errorMessage.substring(0, ind);
+ final String rightSide = errorMessage.substring(ind + numLen);
+ if (matches.length == 1) {
+ errorMessage = leftSide + matches[0] + rightSide;
+ } else {
+ errorMessage = formatMultipleSynthesis(matches, leftSide,
+ rightSide);
+ }
+ matchCounter++;
+ newWay = true;
+ }
+ } else {
+ // FIXME: is this correct? this is how we deal with multiple matches
+ suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
+ }
+ }
+
+ if (!newWay) {
+ // in case <match> elements weren't used (yet)
+ errorMessage = errorMessage.replace("\\" + (j + 1),
+ tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
+ }
+ }
+ errMarker = errorMessage.indexOf('\\');
+ numberFollows = false;
+ errLen = errorMessage.length();
+ if (errMarker > 0 && errMarker < errLen - 1) {
+ numberFollows = StringTools.isPositiveNumber(errorMessage
+ .charAt(errMarker + 1));
+ }
+ }
+ return errorMessage;
+ }
+
+ private static String formatMultipleSynthesis(final String[] matches,
+ final String leftSide, final String rightSide) {
+ String errorMessage = "";
+ String suggestionLeft = "";
+ String suggestionRight = "";
+ String rightSideNew = rightSide;
+ final int sPos = leftSide.lastIndexOf(SUGG_TAG);
+ if (sPos > 0) {
+ suggestionLeft = leftSide.substring(sPos + SUGG_TAG.length());
+ }
+ if (StringTools.isEmpty(suggestionLeft)) {
+ errorMessage = leftSide;
+ } else {
+ errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGG_TAG))
+ + SUGG_TAG;
+ }
+ final int rPos = rightSide.indexOf(END_SUGG_TAG);
+ if (rPos > 0) {
+ suggestionRight = rightSide.substring(0, rPos);
+ }
+ if (!StringTools.isEmpty(suggestionRight)) {
+ rightSideNew = rightSide.substring(rightSide.indexOf(END_SUGG_TAG));
+ }
+ final int lastLeftSugEnd = leftSide.indexOf(END_SUGG_TAG);
+ final int lastLeftSugStart = leftSide.lastIndexOf(SUGG_TAG);
+ final StringBuilder sb = new StringBuilder();
+ sb.append(errorMessage);
+ for (int z = 0; z < matches.length; z++) {
+ sb.append(suggestionLeft);
+ sb.append(matches[z]);
+ sb.append(suggestionRight);
+ if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
+ sb.append(END_SUGG_TAG);
+ sb.append(", ");
+ sb.append(SUGG_TAG);
+ }
+ }
+ sb.append(rightSideNew);
+ return sb.toString();
+ }
+
+ /**
+ * For testing only.
+ */
+ public final List<Element> getElements() {
+ return patternElements;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRuleLoader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRuleLoader.java
new file mode 100644
index 0000000..8156a6e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/PatternRuleLoader.java
@@ -0,0 +1,369 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+
+/**
+ * Loads {@link PatternRule}s from an XML file.
+ *
+ * @author Daniel Naber
+ */
+public class PatternRuleLoader extends DefaultHandler {
+
+ public final List<PatternRule> getRules(final InputStream is,
+ final String filename) throws IOException {
+ try {
+ final PatternRuleHandler handler = new PatternRuleHandler();
+ final SAXParserFactory factory = SAXParserFactory.newInstance();
+ final SAXParser saxParser = factory.newSAXParser();
+ saxParser.getXMLReader().setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+ false);
+ saxParser.parse(is, handler);
+ return handler.getRules();
+ } catch (final Exception e) {
+ final IOException ioe = new IOException("Cannot load or parse '"
+ + filename + "'");
+ ioe.initCause(e);
+ throw ioe;
+ }
+ }
+
+ /** Testing only. */
+ public final void main(final String[] args) throws IOException {
+ final PatternRuleLoader prg = new PatternRuleLoader();
+ final String name = "/de/grammar.xml";
+ final List<PatternRule> l = prg.getRules(JLanguageTool.getDataBroker().getFromRulesDirAsStream(name), name);
+ System.out.println(l);
+ }
+
+}
+
+class PatternRuleHandler extends XMLRuleHandler {
+
+ private int subId;
+
+ private boolean defaultOff;
+ private boolean defaultOn;
+
+ private Category category;
+ private String description;
+ private String ruleGroupDescription;
+
+ // ===========================================================
+ // SAX DocumentHandler methods
+ // ===========================================================
+
+ @Override
+ public void startElement(final String namespaceURI, final String lName,
+ final String qName, final Attributes attrs) throws SAXException {
+ if ("category".equals(qName)) {
+ final String catName = attrs.getValue("name");
+ final String priorityStr = attrs.getValue("priority");
+ // int prio = 0;
+ if (priorityStr == null) {
+ category = new Category(catName);
+ } else {
+ category = new Category(catName, Integer.parseInt(priorityStr));
+ }
+
+ if ("off".equals(attrs.getValue(DEFAULT))) {
+ category.setDefaultOff();
+ }
+
+ } else if ("rules".equals(qName)) {
+ final String languageStr = attrs.getValue("lang");
+ language = Language.getLanguageForShortName(languageStr);
+ if (language == null) {
+ throw new SAXException("Unknown language '" + languageStr + "'");
+ }
+ } else if ("rule".equals(qName)) {
+ id = attrs.getValue("id");
+ if (inRuleGroup) {
+ subId++;
+ }
+ if (!(inRuleGroup && defaultOff)) {
+ defaultOff = "off".equals(attrs.getValue(DEFAULT));
+ }
+
+ if (!(inRuleGroup && defaultOn)) {
+ defaultOn = "on".equals(attrs.getValue(DEFAULT));
+ }
+ if (inRuleGroup && id == null) {
+ id = ruleGroupId;
+ }
+ description = attrs.getValue("name");
+ if (inRuleGroup && description == null) {
+ description = ruleGroupDescription;
+ }
+ correctExamples = new ArrayList<String>();
+ incorrectExamples = new ArrayList<IncorrectExample>();
+ if (suggestionMatches != null) {
+ suggestionMatches.clear();
+ }
+ } else if (PATTERN.equals(qName)) {
+ startPattern(attrs);
+ } else if (AND.equals(qName)) {
+ inAndGroup = true;
+ } else if ("unify".equals(qName)) {
+ inUnification = true;
+ uniNegation = YES.equals(attrs.getValue(NEGATE));
+ } else if ("feature".equals(qName)) {
+ uFeature = attrs.getValue("id");
+ } else if (qName.equals(TYPE)) {
+ uType = attrs.getValue("id");
+ uTypeList.add(uType);
+ } else if (qName.equals(TOKEN)) {
+ setToken(attrs);
+ } else if (EXCEPTION.equals(qName)) {
+ setExceptions(attrs);
+ } else if (qName.equals(EXAMPLE)
+ && attrs.getValue(TYPE).equals("correct")) {
+ inCorrectExample = true;
+ correctExample = new StringBuilder();
+ } else if (qName.equals(EXAMPLE)
+ && attrs.getValue(TYPE).equals("incorrect")) {
+ inIncorrectExample = true;
+ incorrectExample = new StringBuilder();
+ exampleCorrection = new StringBuilder();
+ if (attrs.getValue("correction") != null) {
+ exampleCorrection.append(attrs.getValue("correction"));
+ }
+ } else if ("message".equals(qName)) {
+ inMessage = true;
+ inSuggestion = false;
+ message = new StringBuilder();
+ } else if ("short".equals(qName)) {
+ inShortMessage = true;
+ shortMessage = new StringBuilder();
+ } else if ("rulegroup".equals(qName)) {
+ ruleGroupId = attrs.getValue("id");
+ ruleGroupDescription = attrs.getValue("name");
+ defaultOff = "off".equals(attrs.getValue(DEFAULT));
+ defaultOn = "on".equals(attrs.getValue(DEFAULT));
+ inRuleGroup = true;
+ subId = 0;
+ } else if ("suggestion".equals(qName) && inMessage) {
+ message.append("<suggestion>");
+ inSuggestion = true;
+ } else if ("match".equals(qName)) {
+ setMatchElement(attrs);
+ } else if (qName.equals(MARKER) && inCorrectExample) {
+ correctExample.append("<marker>");
+ } else if (qName.equals(MARKER) && inIncorrectExample) {
+ incorrectExample.append("<marker>");
+ } else if (UNIFICATION.equals(qName)) {
+ uFeature = attrs.getValue("feature");
+ inUnificationDef = true;
+ } else if ("equivalence".equals(qName)) {
+ uType = attrs.getValue(TYPE);
+ } else if (PHRASES.equals(qName)) {
+ inPhrases = true;
+ } else if ("includephrases".equals(qName)) {
+ phraseElementInit();
+ } else if ("phrase".equals(qName) && inPhrases) {
+ phraseId = attrs.getValue("id");
+ } else if ("phraseref".equals(qName) && (attrs.getValue("idref") != null)) {
+ preparePhrase(attrs);
+ }
+ }
+
+ @Override
+ public void endElement(final String namespaceURI, final String sName,
+ final String qName) throws SAXException {
+ if ("rule".equals(qName)) {
+ phraseElementInit();
+ if (phraseElementList.isEmpty()) {
+ final PatternRule rule = new PatternRule(id, language, elementList,
+ description, message.toString(), shortMessage.toString());
+ prepareRule(rule);
+ rules.add(rule);
+ } else {
+ if (!elementList.isEmpty()) {
+ for (final ArrayList<Element> ph : phraseElementList) {
+ ph.addAll(new ArrayList<Element>(elementList));
+ }
+ }
+
+ for (final ArrayList<Element> phraseElement : phraseElementList) {
+ processElement(phraseElement);
+ final PatternRule rule = new PatternRule(id, language, phraseElement,
+ description, message.toString(), shortMessage.toString(),
+ phraseElementList.size() > 1);
+ prepareRule(rule);
+ rules.add(rule);
+ }
+ }
+ elementList.clear();
+ if (phraseElementList != null) {
+ phraseElementList.clear();
+ }
+
+ } else if (qName.equals(EXCEPTION)) {
+ finalizeExceptions();
+ } else if (qName.equals(AND)) {
+ inAndGroup = false;
+ andGroupCounter = 0;
+ tokenCounter++;
+ } else if (qName.equals(TOKEN)) {
+ finalizeTokens();
+ } else if (qName.equals(PATTERN)) {
+ checkMarkPositions();
+ inPattern = false;
+ if (lastPhrase) {
+ elementList.clear();
+ }
+ if (phraseElementList == null || phraseElementList.isEmpty()) {
+ checkPositions(0);
+ } else {
+ for (List<Element> elements : phraseElementList) {
+ checkPositions(elements.size());
+ }
+ }
+ tokenCounter = 0;
+ } else if (qName.equals(EXAMPLE)) {
+ if (inCorrectExample) {
+ correctExamples.add(correctExample.toString());
+ } else if (inIncorrectExample) {
+ IncorrectExample example = null;
+ final String[] corrections = exampleCorrection.toString().split("\\|");
+ if (corrections.length > 0 && corrections[0].length() > 0) {
+ example = new IncorrectExample(incorrectExample.toString(),
+ corrections);
+ } else {
+ example = new IncorrectExample(incorrectExample.toString());
+ }
+ incorrectExamples.add(example);
+ }
+ inCorrectExample = false;
+ inIncorrectExample = false;
+ correctExample = new StringBuilder();
+ incorrectExample = new StringBuilder();
+ exampleCorrection = new StringBuilder();
+ } else if ("message".equals(qName)) {
+ suggestionMatches = addLegacyMatches();
+ inMessage = false;
+ } else if ("short".equals(qName)) {
+ inShortMessage = false;
+ } else if ("match".equals(qName)) {
+ if (inMessage) {
+ suggestionMatches.get(suggestionMatches.size() - 1).setLemmaString(
+ match.toString());
+ } else if (inToken) {
+ tokenReference.setLemmaString(match.toString());
+ }
+ inMatch = false;
+ } else if ("rulegroup".equals(qName)) {
+ inRuleGroup = false;
+ } else if ("suggestion".equals(qName) && inMessage) {
+ message.append("</suggestion>");
+ inSuggestion = false;
+ } else if (qName.equals(MARKER) && inCorrectExample) {
+ correctExample.append("</marker>");
+ } else if (qName.equals(MARKER) && inIncorrectExample) {
+ incorrectExample.append("</marker>");
+ } else if ("phrase".equals(qName) && inPhrases) {
+ finalizePhrase();
+ } else if ("includephrases".equals(qName)) {
+ elementList.clear();
+ } else if (PHRASES.equals(qName) && inPhrases) {
+ inPhrases = false;
+ } else if (UNIFICATION.equals(qName)) {
+ inUnificationDef = false;
+ } else if ("feature".equals(qName)) {
+ equivalenceFeatures.put(uFeature, uTypeList);
+ uTypeList = new ArrayList<String>();
+ } else if ("unify".equals(qName)) {
+ inUnification = false;
+ //clear the features...
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ }
+ }
+
+ private void prepareRule(final PatternRule rule) {
+ rule.setStartPositionCorrection(startPositionCorrection);
+ rule.setEndPositionCorrection(endPositionCorrection);
+ startPositionCorrection = 0;
+ endPositionCorrection = 0;
+ rule.setCorrectExamples(correctExamples);
+ rule.setIncorrectExamples(incorrectExamples);
+ rule.setCategory(category);
+ if (inRuleGroup) {
+ rule.setSubId(Integer.toString(subId));
+ }
+ else {
+ rule.setSubId("1");
+ }
+ caseSensitive = false;
+ if (suggestionMatches != null) {
+ for (final Match m : suggestionMatches) {
+ rule.addSuggestionMatch(m);
+ }
+ if (phraseElementList.size() <= 1) {
+ suggestionMatches.clear();
+ }
+ }
+ if (defaultOff) {
+ rule.setDefaultOff();
+ }
+
+ if (category.isDefaultOff() && !defaultOn) {
+ rule.setDefaultOff();
+ }
+
+ }
+
+ @Override
+ public void characters(final char[] buf, final int offset, final int len) {
+ final String s = new String(buf, offset, len);
+ if (inException) {
+ exceptions.append(s);
+ } else if (inToken) {
+ elements.append(s);
+ } else if (inCorrectExample) {
+ correctExample.append(s);
+ } else if (inIncorrectExample) {
+ incorrectExample.append(s);
+ } else if (inMatch) {
+ match.append(s);
+ } else if (inMessage) {
+ message.append(s);
+ } else if (inShortMessage) {
+ shortMessage.append(s);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java
new file mode 100644
index 0000000..7fbb35d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java
@@ -0,0 +1,432 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * Implements unification of features over tokens.
+ *
+ * @author Marcin Milkowski
+ */
+public class Unifier {
+
+ //TODO: add a possibility to negate some features but not all
+ /**
+ * Negates the meaning of unification just like negation in Element tokens.
+ */
+ private boolean negation;
+
+ private boolean allFeatsIn;
+
+ private int tokCnt;
+
+ private int readingsCounter;
+
+ private final List<AnalyzedTokenReadings> tokSequence;
+
+ /**
+ * A Map for storing the equivalence types for features. Features are
+ * specified as Strings, and map into types defined as maps from Strings to
+ * Elements.
+ */
+ private final Map<EquivalenceTypeLocator, Element> equivalenceTypes;
+
+ /**
+ * A Map that stores all possible equivalence types listed for features.
+ */
+ private final Map<String, List<String>> equivalenceFeatures;
+
+ /**
+ * Map of sets of matched equivalences in the unified sequence.
+ */
+ private final List<Map<String, Set<String>>> equivalencesMatched;
+
+ /**
+ * Marks found interpretations in subsequent tokens.
+ */
+ private List<Boolean> featuresFound;
+
+ /**
+ * For checking the current token.
+ */
+ private List<Boolean> tmpFeaturesFound;
+
+ /**
+ * Internal flag for checking whether the first token in tokSequence has to be
+ * yet unified.
+ */
+ private boolean firstUnified;
+
+ private boolean inUnification;
+ private boolean uniMatched;
+ private boolean uniAllMatched;
+ private AnalyzedTokenReadings[] unifiedTokens;
+
+ /**
+ * Instantiates the unifier.
+ */
+ public Unifier() {
+ tokCnt = -1;
+ readingsCounter = 1;
+ equivalencesMatched = new ArrayList<Map<String, Set<String>>>();
+ equivalenceTypes = new HashMap<EquivalenceTypeLocator, Element>();
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ featuresFound = new ArrayList<Boolean>();
+ tmpFeaturesFound = new ArrayList<Boolean>();
+ tokSequence = new ArrayList<AnalyzedTokenReadings>();
+ }
+
+ /**
+ * Prepares equivalence types for features to be tested. All equivalence types
+ * are given as {@link Element}s. They create an equivalence set (with
+ * abstraction).
+ *
+ * @param feature
+ * Feature to be tested, like gender, grammatical case or number.
+ * @param type
+ * Type of equivalence for the feature, for example plural, first
+ * person, genitive.
+ * @param elem
+ * Element specifying the equivalence.
+ */
+ public final void setEquivalence(final String feature, final String type,
+ final Element elem) {
+ if (equivalenceTypes.containsKey(new EquivalenceTypeLocator(feature, type))) {
+ return;
+ }
+ equivalenceTypes.put(new EquivalenceTypeLocator(feature, type), elem);
+ List<String> lTypes;
+ if (equivalenceFeatures.containsKey(feature)) {
+ lTypes = equivalenceFeatures.get(feature);
+ } else {
+ lTypes = new ArrayList<String>();
+ }
+ lTypes.add(type);
+ equivalenceFeatures.put(feature, lTypes);
+ }
+
+ /**
+ * Tests if a token has shared features with other tokens.
+ *
+ * @param aToken
+ * - token to be tested
+ * @param feature
+ * - feature to be tested
+ * @param type
+ * - type of equivalence relation for the feature
+ * @return true if the token shares this type of feature with other tokens
+ */
+ protected final boolean isSatisfied(final AnalyzedToken aToken,
+ final Map<String, List<String>> uFeatures) {
+
+ if (allFeatsIn && equivalencesMatched.isEmpty()) {
+ return false;
+ }
+ // Error: no feature given!
+ if (uFeatures == null) {
+ return false; // throw exception??
+ }
+ boolean unified = true;
+ List<String> types;
+
+ if (allFeatsIn) {
+ unified &= checkNext(aToken, uFeatures);
+ } else {
+ tokCnt++;
+ while (equivalencesMatched.size() <= tokCnt) {
+ equivalencesMatched.add(new HashMap<String, Set<String>>());
+ }
+ for (final Map.Entry<String, List<String>> feat : uFeatures.entrySet()) {
+ types = feat.getValue();
+ if (types == null || types.isEmpty()) {
+ types = equivalenceFeatures.get(feat.getKey());
+ }
+ for (final String typename : types) {
+ final Element testElem = equivalenceTypes
+ .get(new EquivalenceTypeLocator(feat.getKey(), typename));
+ if (testElem == null) {
+ return false;
+ }
+ if (testElem.isMatched(aToken)) {
+ if (!equivalencesMatched.get(tokCnt).containsKey(feat.getKey())) {
+ final Set<String> typeSet = new HashSet<String>();
+ typeSet.add(typename);
+ equivalencesMatched.get(tokCnt).put(feat.getKey(), typeSet);
+ } else {
+ equivalencesMatched.get(tokCnt).get(feat.getKey()).add(typename);
+ }
+ }
+ }
+ unified &= equivalencesMatched.get(tokCnt).containsKey(feat.getKey());
+ if (!unified) {
+ break;
+ }
+ }
+ if (unified) {
+ if (tokCnt == 0 || tokSequence.isEmpty()) {
+ tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
+ } else {
+ tokSequence.get(0).addReading(aToken);
+ }
+ }
+ }
+ return unified ^ negation;
+ }
+
+ private boolean checkNext(final AnalyzedToken aToken,
+ final Map<String, List<String>> uFeatures) {
+ boolean unifiedNext = true;
+ boolean anyFeatUnified = false;
+ List<String> types;
+ ArrayList<Boolean> tokenFeaturesFound = new ArrayList<Boolean>(tmpFeaturesFound);
+ if (allFeatsIn) {
+ for (int i = 0; i <= tokCnt; i++) {
+ boolean allFeatsUnified = true;
+ for (Map.Entry<String, List<String>> feat : uFeatures.entrySet()) {
+ boolean featUnified = false;
+ types = feat.getValue();
+ if (types == null || types.isEmpty()) {
+ types = equivalenceFeatures.get(feat.getKey());
+ }
+ for (final String typename : types) {
+ if (featuresFound.get(i)
+ && equivalencesMatched.get(i).containsKey(feat.getKey())
+ && equivalencesMatched.get(i).get(feat.getKey()).contains(typename)) {
+ final Element testElem = equivalenceTypes
+ .get(new EquivalenceTypeLocator(feat.getKey(), typename));
+ featUnified = featUnified || testElem.isMatched(aToken);
+ }
+ }
+ allFeatsUnified &= featUnified;
+ }
+ tokenFeaturesFound.set(i, allFeatsUnified);
+ anyFeatUnified = anyFeatUnified || allFeatsUnified;
+ }
+ unifiedNext &= anyFeatUnified;
+ if (unifiedNext) {
+ if (tokSequence.size() == readingsCounter) {
+ tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
+ } else {
+ tokSequence.get(readingsCounter).addReading(aToken);
+ }
+ tmpFeaturesFound = tokenFeaturesFound;
+ }
+ }
+ return unifiedNext;
+ }
+
+ /**
+ * Call after every complete token (AnalyzedTokenReadings) checked.
+ */
+ public final void startNextToken() {
+ featuresFound = new ArrayList<Boolean>(tmpFeaturesFound);
+ readingsCounter++;
+ }
+
+ /**
+ * Starts testing only those equivalences that were previously matched.
+ */
+ public final void startUnify() {
+ allFeatsIn = true;
+ for (int i = 0; i <= tokCnt; i++) {
+ featuresFound.add(true);
+ }
+ tmpFeaturesFound = new ArrayList<Boolean>(featuresFound);
+ }
+
+ public final void setNegation(final boolean neg) {
+ negation = neg;
+ }
+
+ public final boolean getNegation() {
+ return negation;
+ }
+
+ /**
+ * Resets after use of unification. Required.
+ */
+ public final void reset() {
+ equivalencesMatched.clear();
+ allFeatsIn = false;
+ negation = false;
+ tokCnt = -1;
+ featuresFound.clear();
+ tmpFeaturesFound.clear();
+ tokSequence.clear();
+ readingsCounter = 1;
+ firstUnified = false;
+ uniMatched = false;
+ uniAllMatched = false;
+ inUnification = false;
+ }
+
+ /**
+ * Gets a full sequence of filtered tokens.
+ *
+ * @return Array of AnalyzedTokenReadings that match equivalence relation
+ * defined for features tested.
+ */
+ public final AnalyzedTokenReadings[] getUnifiedTokens() {
+ if (tokSequence.isEmpty()) {
+ return null;
+ }
+ if (!firstUnified) {
+ AnalyzedTokenReadings tmpATR;
+ int first = 0;
+ tmpFeaturesFound.add(true); // Bentley's search idea
+ while (!tmpFeaturesFound.get(first)) {
+ first++;
+ }
+ tmpFeaturesFound.remove(tmpFeaturesFound.size() - 1);
+ if (first >= tmpFeaturesFound.size()) {
+ return null;
+ }
+ // FIXME: why this happens??
+ final int numRead = tokSequence.get(0).getReadingsLength();
+ if (first < numRead) {
+ tmpATR = new AnalyzedTokenReadings(tokSequence.get(0).getAnalyzedToken(
+ first), 0);
+ for (int i = first + 1; i <= Math.min(numRead - 1, tokCnt); i++) {
+ if (tmpFeaturesFound.get(i)) {
+ tmpATR.addReading(tokSequence.get(0).getAnalyzedToken(i));
+ }
+ }
+ tokSequence.set(0, tmpATR);
+ }
+ firstUnified = true;
+ }
+ final AnalyzedTokenReadings[] atr = tokSequence
+ .toArray(new AnalyzedTokenReadings[tokSequence.size()]);
+ return atr;
+ }
+
+ /**
+ * Tests if the token sequence is unified.
+ *
+ * @param matchToken
+ * AnalyzedToken token to unify
+ * @param feature
+ * String: feature to unify over
+ * @param type
+ * String: value types of the feature
+ * @param isUniNegated
+ * if true, then return negated result
+ * @param lastReading
+ * true when the matchToken is the last reading in the
+ * AnalyzedReadings
+ * @return True if the tokens in the sequence are unified.
+ */
+ public final boolean isUnified(final AnalyzedToken matchToken,
+ final Map<String, List<String>> uFeatures, final boolean isUniNegated,
+ final boolean lastReading) {
+ if (inUnification) {
+ uniMatched |= isSatisfied(matchToken, uFeatures);
+ uniAllMatched = uniMatched;
+ if (lastReading) {
+ startNextToken();
+ unifiedTokens = getUnifiedTokens();
+ uniMatched = false;
+ }
+ return uniAllMatched;
+ }
+ if (isUniNegated) {
+ setNegation(true);
+ }
+ isSatisfied(matchToken, uFeatures);
+ if (lastReading) {
+ inUnification = true;
+ uniMatched = false;
+ startUnify();
+ }
+ return true;
+ }
+
+ /**
+ * Used for getting a unified sequence in case when simple test method
+ * {@link #isUnified} was used.
+ *
+ * @return An array of {@link AnalyzedTokenReadings}
+ */
+ public final AnalyzedTokenReadings[] getFinalUnified() {
+ if (inUnification) {
+ return unifiedTokens;
+ }
+ return null;
+ }
+}
+
+class EquivalenceTypeLocator {
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((feature == null) ? 0 : feature.hashCode());
+ result = prime * result + ((type == null) ? 0 : type.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final EquivalenceTypeLocator other = (EquivalenceTypeLocator) obj;
+ if (feature == null) {
+ if (other.feature != null) {
+ return false;
+ }
+ } else if (!feature.equals(other.feature)) {
+ return false;
+ }
+ if (type == null) {
+ if (other.type != null) {
+ return false;
+ }
+ } else if (!type.equals(other.type)) {
+ return false;
+ }
+ return true;
+ }
+
+ private final String feature;
+ private final String type;
+
+ EquivalenceTypeLocator(final String feature, final String type) {
+ this.feature = feature;
+ this.type = type;
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/XMLRuleHandler.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/XMLRuleHandler.java
new file mode 100644
index 0000000..72a852a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/XMLRuleHandler.java
@@ -0,0 +1,568 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * XML rule handler that loads rules from XML and throws
+ * exceptions on errors and warnings.
+ *
+ * @author Daniel Naber
+ */
+public class XMLRuleHandler extends DefaultHandler {
+
+ public XMLRuleHandler() {
+ elementList = new ArrayList<Element>();
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ uTypeList = new ArrayList<String>();
+ }
+
+ List<PatternRule> rules = new ArrayList<PatternRule>();
+
+ protected Language language;
+
+ protected StringBuilder correctExample = new StringBuilder();
+ protected StringBuilder incorrectExample = new StringBuilder();
+ protected StringBuilder exampleCorrection = new StringBuilder();
+ protected StringBuilder message = new StringBuilder();
+ protected StringBuilder match = new StringBuilder();
+ protected StringBuilder elements;
+ protected StringBuilder exceptions;
+
+ List<String> correctExamples = new ArrayList<String>();
+ List<IncorrectExample> incorrectExamples = new ArrayList<IncorrectExample>();
+
+ protected boolean inPattern;
+ protected boolean inCorrectExample;
+ protected boolean inIncorrectExample;
+ protected boolean inMessage;
+ protected boolean inSuggestion;
+ protected boolean inMatch;
+ protected boolean inRuleGroup;
+ protected boolean inToken;
+ protected boolean inException;
+ protected boolean inPhrases;
+ protected boolean inAndGroup;
+
+ protected boolean tokenSpaceBefore;
+ protected boolean tokenSpaceBeforeSet;
+ protected String posToken;
+ protected boolean posNegation;
+ protected boolean posRegExp;
+
+ protected boolean caseSensitive;
+ protected boolean regExpression;
+ protected boolean tokenNegated;
+ protected boolean tokenInflected;
+
+ protected String exceptionPosToken;
+ protected boolean exceptionStringRegExp;
+ protected boolean exceptionStringNegation;
+ protected boolean exceptionStringInflected;
+ protected boolean exceptionPosNegation;
+ protected boolean exceptionPosRegExp;
+ protected boolean exceptionValidNext;
+ protected boolean exceptionValidPrev;
+ protected boolean exceptionSet;
+ protected boolean exceptionSpaceBefore;
+ protected boolean exceptionSpaceBeforeSet;
+
+ /** List of elements as specified by tokens. **/
+ protected List<Element> elementList;
+
+ /** true when phraseref is the last element in the rule. **/
+ protected boolean lastPhrase;
+
+ /** ID reference to the phrase. **/
+ protected String phraseIdRef;
+
+ /** Current phrase ID. **/
+ protected String phraseId;
+
+ protected int skipPos;
+
+ protected String ruleGroupId;
+
+ protected String id;
+
+ protected Element tokenElement;
+
+ protected Match tokenReference;
+
+ protected List<Match> suggestionMatches;
+
+ protected Locator pLocator;
+
+ protected int startPositionCorrection;
+ protected int endPositionCorrection;
+ protected int tokenCounter;
+
+ /** Phrase store - elementLists keyed by phraseIds. **/
+ protected Map<String, List<List<Element>>> phraseMap;
+
+ /**
+ * Logically forking element list, used for including multiple phrases in the
+ * current one.
+ **/
+ protected List<ArrayList<Element>> phraseElementList;
+
+ protected int andGroupCounter;
+
+ protected StringBuilder shortMessage = new StringBuilder();
+ protected boolean inShortMessage;
+
+ protected boolean inUnification;
+ protected boolean inUnificationDef;
+ protected boolean uniNegation;
+
+ protected String uFeature;
+ protected String uType = "";
+
+ protected List<String> uTypeList;
+
+ protected Map<String, List<String>> equivalenceFeatures;
+
+
+ /** Definitions of values in XML files. */
+ protected static final String YES = "yes";
+ protected static final String POSTAG = "postag";
+ protected static final String POSTAG_REGEXP = "postag_regexp";
+ protected static final String REGEXP = "regexp";
+ protected static final String NEGATE = "negate";
+ protected static final String INFLECTED = "inflected";
+ protected static final String NEGATE_POS = "negate_pos";
+ protected static final String MARKER = "marker";
+ protected static final String DEFAULT = "default";
+ protected static final String TYPE = "type";
+ protected static final String SPACEBEFORE = "spacebefore";
+ protected static final String EXAMPLE = "example";
+ protected static final String SCOPE = "scope";
+ protected static final String IGNORE = "ignore";
+ protected static final String SKIP = "skip";
+ protected static final String TOKEN = "token";
+ protected static final String FEATURE = "feature";
+ protected static final String UNIFY = "unify";
+ protected static final String AND = "and";
+ protected static final String EXCEPTION = "exception";
+ protected static final String CASE_SENSITIVE = "case_sensitive";
+ protected static final String PATTERN = "pattern";
+ protected static final String MATCH = "match";
+ protected static final String UNIFICATION = "unification";
+ protected static final String RULEGROUP = "rulegroup";
+ protected static final String NO = "no";
+ protected static final String MARK_TO = "mark_to";
+ protected static final String MARK_FROM = "mark_from";
+ protected static final String PHRASES = "phrases";
+ protected static final String MESSAGE = "message";
+
+
+ public List<PatternRule> getRules() {
+ return rules;
+ }
+
+ public void warning (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+ public void error (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+ @Override
+ public void setDocumentLocator(final Locator locator) {
+ pLocator = locator;
+ super.setDocumentLocator(locator);
+ }
+
+ protected void resetToken() {
+ posNegation = false;
+ posRegExp = false;
+ inToken = false;
+ tokenSpaceBefore = false;
+ tokenSpaceBeforeSet = false;
+
+ resetException();
+ exceptionSet = false;
+ tokenReference = null;
+ }
+
+ protected void resetException() {
+ exceptionStringNegation = false;
+ exceptionStringInflected = false;
+ exceptionPosNegation = false;
+ exceptionPosRegExp = false;
+ exceptionStringRegExp = false;
+ exceptionValidNext = false;
+ exceptionValidPrev = false;
+ exceptionSpaceBefore = false;
+ exceptionSpaceBeforeSet = false;
+ }
+
+ protected void phraseElementInit() {
+ // lazy init
+ if (phraseElementList == null) {
+ phraseElementList = new ArrayList<ArrayList<Element>>();
+ }
+ }
+ protected void preparePhrase(final Attributes attrs) {
+ phraseIdRef = attrs.getValue("idref");
+ if (phraseMap.containsKey(phraseIdRef)) {
+ for (final List<Element> curPhrEl : phraseMap.get(phraseIdRef)) {
+ for (final Element e : curPhrEl) {
+ e.setPhraseName(phraseIdRef);
+ }
+ if (elementList.isEmpty()) {
+ phraseElementList.add(new ArrayList<Element>(curPhrEl));
+ } else {
+ final ArrayList<Element> prevList = new ArrayList<Element>(
+ elementList);
+ prevList.addAll(curPhrEl);
+ phraseElementList.add(new ArrayList<Element>(prevList));
+ prevList.clear();
+ }
+ }
+ lastPhrase = true;
+ }
+ }
+
+ protected void finalizePhrase() {
+ // lazy init
+ if (phraseMap == null) {
+ phraseMap = new HashMap<String, List<List<Element>>>();
+ }
+ phraseElementInit();
+ if (phraseElementList.isEmpty()) {
+ phraseElementList.add(new ArrayList<Element>(elementList));
+ } else {
+ for (final ArrayList<Element> ph : phraseElementList) {
+ ph.addAll(new ArrayList<Element>(elementList));
+ }
+ }
+
+ phraseMap.put(phraseId, new ArrayList<List<Element>>(phraseElementList));
+ elementList.clear();
+
+ phraseElementList.clear();
+ }
+
+ protected void startPattern(final Attributes attrs) throws SAXException {
+ inPattern = true;
+ if (attrs.getValue(MARK_FROM) != null) {
+ startPositionCorrection = Integer.parseInt(attrs.getValue(MARK_FROM));
+ }
+ if (attrs.getValue(MARK_TO) != null) {
+ endPositionCorrection = Integer.parseInt(attrs.getValue(MARK_TO));
+ if (endPositionCorrection > 0) {
+ throw new SAXException("End position correction (mark_to="+ endPositionCorrection
+ + ") cannot be larger than 0: " + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ }
+ caseSensitive = YES.equals(attrs.getValue(CASE_SENSITIVE));
+ }
+
+
+ /**
+ * Calculates the offset of the match reference (if any) in case the match
+ * element has been used in the group.
+ *
+ * @param elList
+ * Element list where the match element was used. It is directly changed.
+ */
+ protected void processElement(final List<Element> elList) {
+ int counter = 0;
+ for (final Element elTest : elList) {
+ if (elTest.getPhraseName() != null && counter > 0) {
+ if (elTest.isReferenceElement()) {
+ final int tokRef = elTest.getMatch().getTokenRef();
+ elTest.getMatch().setTokenRef(tokRef + counter - 1);
+ final String offsetToken = elTest.getString().replace("\\" + tokRef,
+ "\\" + (tokRef + counter - 1));
+ elTest.setStringElement(offsetToken);
+ }
+ }
+ counter++;
+ }
+ }
+
+ protected void setMatchElement(final Attributes attrs) throws SAXException {
+ inMatch = true;
+ match = new StringBuilder();
+ Match.CaseConversion caseConversion = Match.CaseConversion.NONE;
+ if (attrs.getValue("case_conversion") != null) {
+ caseConversion = Match.CaseConversion.toCase(attrs
+ .getValue("case_conversion").toUpperCase());
+ }
+ Match.IncludeRange includeRange = Match.IncludeRange.NONE;
+ if (attrs.getValue("include_skipped") != null) {
+ includeRange = Match.IncludeRange.toRange(attrs
+ .getValue("include_skipped").toUpperCase());
+ }
+ final Match mWorker = new Match(attrs.getValue(POSTAG), attrs
+ .getValue("postag_replace"), YES
+ .equals(attrs.getValue(POSTAG_REGEXP)), attrs
+ .getValue("regexp_match"), attrs.getValue("regexp_replace"),
+ caseConversion, YES.equals(attrs.getValue("setpos")),
+ includeRange);
+ mWorker.setInMessageOnly(!inSuggestion);
+ if (inMessage) {
+ if (suggestionMatches == null) {
+ suggestionMatches = new ArrayList<Match>();
+ }
+ suggestionMatches.add(mWorker);
+ //add incorrect XML character for simplicity
+ message.append("\u0001\\");
+ message.append(attrs.getValue("no"));
+ if (StringTools.isEmpty(attrs.getValue("no"))) {
+ throw new SAXException("References cannot be empty: " + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ } else if (Integer.parseInt(attrs.getValue("no")) < 1) {
+ throw new SAXException("References must be larger than 0: "
+ + attrs.getValue("no") + "\n Line: " + pLocator.getLineNumber()
+ + ", column: " + pLocator.getColumnNumber() + ".");
+ }
+ } else if (inToken && attrs.getValue("no") != null) {
+ final int refNumber = Integer.parseInt(attrs.getValue("no"));
+ if (refNumber > elementList.size()) {
+ throw new SAXException(
+ "Only backward references in match elements are possible, tried to specify token "
+ + refNumber
+ + "\n Line: "
+ + pLocator.getLineNumber()
+ + ", column: " + pLocator.getColumnNumber() + ".");
+ }
+ mWorker.setTokenRef(refNumber);
+ tokenReference = mWorker;
+ elements.append('\\');
+ elements.append(refNumber);
+ }
+ }
+
+ protected void setExceptions(final Attributes attrs) {
+ inException = true;
+ exceptions = new StringBuilder();
+ resetException();
+
+ exceptionStringNegation = YES.equals(attrs.getValue(NEGATE));
+ exceptionValidNext = "next".equals(attrs.getValue(SCOPE));
+ exceptionValidPrev = "previous".equals(attrs.getValue(SCOPE));
+ exceptionStringInflected = YES.equals(attrs.getValue(INFLECTED));
+
+ if (attrs.getValue(POSTAG) != null) {
+ exceptionPosToken = attrs.getValue(POSTAG);
+ exceptionPosRegExp = YES.equals(attrs.getValue(POSTAG_REGEXP));
+ exceptionPosNegation = YES.equals(attrs.getValue(NEGATE_POS));
+ }
+ exceptionStringRegExp = YES.equals(attrs.getValue(REGEXP));
+ if (attrs.getValue(SPACEBEFORE) != null) {
+ exceptionSpaceBefore = YES.equals(attrs.getValue(SPACEBEFORE));
+ exceptionSpaceBeforeSet = !"ignore".equals(attrs.getValue(SPACEBEFORE));
+ }
+ }
+
+ protected void finalizeExceptions() {
+ inException = false;
+ if (!exceptionSet) {
+ tokenElement = new Element(StringTools.trimWhitespace(elements
+ .toString()), caseSensitive, regExpression, tokenInflected);
+ exceptionSet = true;
+ }
+ tokenElement.setNegation(tokenNegated);
+ if (!StringTools.isEmpty(exceptions.toString())) {
+ tokenElement.setStringException(StringTools.trimWhitespace(exceptions
+ .toString()), exceptionStringRegExp, exceptionStringInflected,
+ exceptionStringNegation, exceptionValidNext, exceptionValidPrev);
+ }
+ if (exceptionPosToken != null) {
+ tokenElement.setPosException(exceptionPosToken, exceptionPosRegExp,
+ exceptionPosNegation, exceptionValidNext, exceptionValidPrev);
+ exceptionPosToken = null;
+ }
+ if (exceptionSpaceBeforeSet) {
+ tokenElement.setExceptionSpaceBefore(exceptionSpaceBefore);
+ }
+ resetException();
+ }
+
+ protected void setToken(final Attributes attrs) {
+ inToken = true;
+
+ if (lastPhrase) {
+ elementList.clear();
+ }
+
+ lastPhrase = false;
+ tokenNegated = YES.equals(attrs.getValue(NEGATE));
+ tokenInflected = YES.equals(attrs.getValue(INFLECTED));
+ if (attrs.getValue("skip") != null) {
+ skipPos = Integer.parseInt(attrs.getValue("skip"));
+ }
+ elements = new StringBuilder();
+ // POSElement creation
+ if (attrs.getValue(POSTAG) != null) {
+ posToken = attrs.getValue(POSTAG);
+ posRegExp = YES.equals(attrs.getValue(POSTAG_REGEXP));
+ posNegation = YES.equals(attrs.getValue(NEGATE_POS));
+ }
+ regExpression = YES.equals(attrs.getValue(REGEXP));
+
+ if (attrs.getValue(SPACEBEFORE) != null) {
+ tokenSpaceBefore = YES.equals(attrs.getValue(SPACEBEFORE));
+ tokenSpaceBeforeSet = !"ignore".equals(attrs.getValue(SPACEBEFORE));
+ }
+
+ if (!inAndGroup) {
+ tokenCounter++;
+ }
+ }
+
+ protected void checkPositions(final int add) throws SAXException {
+ if (startPositionCorrection >= tokenCounter + add) {
+ throw new SAXException(
+ "Attempt to mark a token no. ("+ startPositionCorrection +") that is outside the pattern ("
+ + tokenCounter + "). Pattern elements are numbered starting from 0!" + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ if (tokenCounter +add - endPositionCorrection < 0) {
+ throw new SAXException(
+ "Attempt to mark a token no. ("+ endPositionCorrection +") that is outside the pattern ("
+ + tokenCounter + " elements). End positions should be negative but not larger than the token count!"
+ + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ }
+
+ protected void checkMarkPositions() {
+ if (phraseElementList == null || phraseElementList.size() == 0) {
+ final int endMarker = elementList.size() + endPositionCorrection;
+ if (endMarker <= startPositionCorrection) {
+ throw new RuntimeException("Invalid combination of mark_from (" + startPositionCorrection
+ + ") and mark_to (" + endPositionCorrection + ") for rule " + id
+ + " with " + elementList.size()
+ + " tokens: the error position created by mark_from and mark_to is less than one token");
+ }
+ }
+ }
+
+ /**
+ * Adds Match objects for all references to tokens
+ * (including '\1' and the like).
+ */
+ protected List<Match> addLegacyMatches() {
+ if (suggestionMatches == null || suggestionMatches.isEmpty()) {
+ return null;
+ }
+ final List<Match> sugMatch = new ArrayList<Match>();
+ final String messageStr = message.toString();
+ int pos = 0;
+ int ind = 0;
+ int matchCounter = 0;
+ while (pos != -1) {
+ pos = messageStr.indexOf('\\', ind + 1);
+ if (pos != -1 && messageStr.length() > pos) {
+ if (Character.isDigit(messageStr.charAt(pos + 1))) {
+ if (pos == 1 || messageStr.charAt(pos - 1) != '\u0001') {
+ final Match mWorker = new Match(null, null, false, null,
+ null, Match.CaseConversion.NONE, false, Match.IncludeRange.NONE);
+ mWorker.setInMessageOnly(true);
+ sugMatch.add(mWorker);
+ } else if (messageStr.charAt(pos - 1) == '\u0001') { // real suggestion marker
+ sugMatch.add(suggestionMatches.get(matchCounter));
+ message.deleteCharAt(pos - 1 - matchCounter);
+ matchCounter++;
+ }
+ }
+ }
+ ind = pos;
+ }
+ if (sugMatch.isEmpty()) {
+ return suggestionMatches;
+ }
+ return sugMatch;
+ }
+
+ protected void finalizeTokens() {
+ if (!exceptionSet || tokenElement == null) {
+ tokenElement = new Element(StringTools.trimWhitespace(elements
+ .toString()), caseSensitive, regExpression, tokenInflected);
+ tokenElement.setNegation(tokenNegated);
+ } else {
+ tokenElement.setStringElement(StringTools.trimWhitespace(elements
+ .toString()));
+ }
+
+ if (skipPos != 0) {
+ tokenElement.setSkipNext(skipPos);
+ skipPos = 0;
+ }
+ if (posToken != null) {
+ tokenElement.setPosElement(posToken, posRegExp, posNegation);
+ posToken = null;
+ }
+
+ if (tokenReference != null) {
+ tokenElement.setMatch(tokenReference);
+ }
+
+ if (inAndGroup && andGroupCounter > 0) {
+ elementList.get(elementList.size() - 1)
+ .setAndGroupElement(tokenElement);
+ } else {
+ elementList.add(tokenElement);
+ }
+ if (inAndGroup) {
+ andGroupCounter++;
+ }
+
+ if (inUnification) {
+ tokenElement.setUnification(equivalenceFeatures);
+ if (uniNegation) {
+ tokenElement.setUniNegation();
+ }
+ }
+
+ if (inUnificationDef) {
+ language.getUnifier().setEquivalence(uFeature, uType, tokenElement);
+ elementList.clear();
+ }
+ if (tokenSpaceBeforeSet) {
+ tokenElement.setWhitespaceBefore(tokenSpaceBefore);
+ }
+ resetToken();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRule.java
new file mode 100644
index 0000000..1d42a17
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRule.java
@@ -0,0 +1,93 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns.bitext;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.bitext.BitextRule;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * A bitext pattern rule class. A BitextPatternRule describes a language error and
+ * can test whether a given pre-analyzed pair of source and target text
+ * contains that error using the {@link Rule#match} method. It uses the syntax
+ * of XML files similar to normal PatternRules.
+ *
+ * @author Marcin Miłkowski
+ */
+public class BitextPatternRule extends BitextRule {
+
+ private final PatternRule srcRule;
+ private final PatternRule trgRule;
+
+ BitextPatternRule(final PatternRule src, final PatternRule trg) {
+ srcRule = src;
+ trgRule = trg;
+ }
+
+ public PatternRule getSrcRule() {
+ return srcRule;
+ }
+
+ public PatternRule getTrgRule() {
+ return trgRule;
+ }
+
+ @Override
+ public String getDescription() {
+ return srcRule.getDescription();
+ }
+
+ public String getMessage() {
+ return trgRule.getMessage();
+ }
+
+ @Override
+ public String getId() {
+ return srcRule.getId();
+ }
+
+ /**
+ * This method always returns an empty array.
+ */
+ @Override
+ public RuleMatch[] match(AnalyzedSentence text) throws IOException {
+ return new RuleMatch[0];
+ }
+
+ @Override
+ public RuleMatch[] match(AnalyzedSentence sourceText,
+ AnalyzedSentence targetText) throws IOException {
+ if (srcRule.match(sourceText).length > 0) {
+ return trgRule.match(targetText);
+ }
+ return new RuleMatch[0];
+ }
+
+ @Override
+ public void reset() {
+ // TODO Auto-generated method stub
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRuleLoader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRuleLoader.java
new file mode 100644
index 0000000..508f381
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextPatternRuleLoader.java
@@ -0,0 +1,413 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns.bitext;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.rules.bitext.IncorrectBitextExample;
+import de.danielnaber.languagetool.rules.patterns.Element;
+import de.danielnaber.languagetool.rules.patterns.Match;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * Loads {@link PatternRule}s from an XML file.
+ *
+ * @author Marcin Miłkowski
+ */
+public class BitextPatternRuleLoader extends DefaultHandler {
+
+ public final List<BitextPatternRule> getRules(final InputStream is,
+ final String filename) throws IOException {
+ final List<BitextPatternRule> rules;
+ try {
+ final PatternRuleHandler handler = new PatternRuleHandler();
+ final SAXParserFactory factory = SAXParserFactory.newInstance();
+ final SAXParser saxParser = factory.newSAXParser();
+ /* saxParser.getXMLReader().setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+ false);
+ */
+ saxParser.parse(is, handler);
+ rules = handler.getBitextRules();
+ return rules;
+ } catch (final Exception e) {
+ final IOException ioe = new IOException("Cannot load or parse '"
+ + filename + "'");
+ ioe.initCause(e);
+ throw ioe;
+ }
+ }
+
+}
+
+class PatternRuleHandler extends BitextXMLRuleHandler {
+
+ private int subId;
+
+ private boolean defaultOff;
+ private boolean defaultOn;
+
+ private Category category;
+ private String description;
+ private String ruleGroupDescription;
+
+ private PatternRule srcRule;
+ private PatternRule trgRule;
+
+ private IncorrectExample trgExample;
+ private IncorrectExample srcExample;
+
+ private Language srcLang;
+
+ // ===========================================================
+ // SAX DocumentHandler methods
+ // ===========================================================
+
+ @Override
+ public void startElement(final String namespaceURI, final String lName,
+ final String qName, final Attributes attrs) throws SAXException {
+ if (qName.equals("category")) {
+ final String catName = attrs.getValue("name");
+ final String priorityStr = attrs.getValue("priority");
+ // int prio = 0;
+ if (priorityStr != null) {
+ category = new Category(catName, Integer.parseInt(priorityStr));
+ } else {
+ category = new Category(catName);
+ }
+
+ if ("off".equals(attrs.getValue(DEFAULT))) {
+ category.setDefaultOff();
+ }
+
+ } else if (qName.equals("rules")) {
+ final String languageStr = attrs.getValue("targetLang");
+ language = Language.getLanguageForShortName(languageStr);
+ if (language == null) {
+ throw new SAXException("Unknown language '" + languageStr + "'");
+ }
+ } else if (qName.equals("rule")) {
+ id = attrs.getValue("id");
+ if (inRuleGroup)
+ subId++;
+ if (!(inRuleGroup && defaultOff)) {
+ defaultOff = "off".equals(attrs.getValue(DEFAULT));
+ }
+
+ if (!(inRuleGroup && defaultOn)) {
+ defaultOn = "on".equals(attrs.getValue(DEFAULT));
+ }
+ if (inRuleGroup && id == null) {
+ id = ruleGroupId;
+ }
+ description = attrs.getValue("name");
+ if (inRuleGroup && description == null) {
+ description = ruleGroupDescription;
+ }
+ correctExamples = new ArrayList<StringPair>();
+ incorrectExamples = new ArrayList<IncorrectBitextExample>();
+ if (suggestionMatches != null) {
+ suggestionMatches.clear();
+ }
+ } else if (PATTERN.equals(qName) || "target".equals(qName)) {
+ startPattern(attrs);
+ } else if (AND.equals(qName)) {
+ inAndGroup = true;
+ } else if (UNIFY.equals(qName)) {
+ inUnification = true;
+ uniNegation = YES.equals(attrs.getValue(NEGATE));
+ } else if (qName.equals("feature")) {
+ uFeature = attrs.getValue("id");
+ } else if (qName.equals(TYPE)) {
+ uType = attrs.getValue("id");
+ uTypeList.add(uType);
+ } else if (qName.equals(TOKEN)) {
+ setToken(attrs);
+ } else if (qName.equals(EXCEPTION)) {
+ setExceptions(attrs);
+ } else if (qName.equals(EXAMPLE)
+ && attrs.getValue(TYPE).equals("correct")) {
+ inCorrectExample = true;
+ correctExample = new StringBuilder();
+ } else if (EXAMPLE.equals(qName)
+ && attrs.getValue(TYPE).equals("incorrect")) {
+ inIncorrectExample = true;
+ incorrectExample = new StringBuilder();
+ exampleCorrection = new StringBuilder();
+ if (attrs.getValue("correction") != null) {
+ exampleCorrection.append(attrs.getValue("correction"));
+ }
+ } else if (MESSAGE.equals(qName)) {
+ inMessage = true;
+ message = new StringBuilder();
+ } else if (qName.equals("short")) {
+ inShortMessage = true;
+ shortMessage = new StringBuilder();
+ } else if (qName.equals(RULEGROUP)) {
+ ruleGroupId = attrs.getValue("id");
+ ruleGroupDescription = attrs.getValue("name");
+ defaultOff = "off".equals(attrs.getValue(DEFAULT));
+ defaultOn = "on".equals(attrs.getValue(DEFAULT));
+ inRuleGroup = true;
+ subId = 0;
+ } else if (qName.equals("suggestion") && inMessage) {
+ message.append("<suggestion>");
+ inSuggestion = true;
+ } else if (qName.equals("match")) {
+ setMatchElement(attrs);
+ } else if (qName.equals(MARKER) && inCorrectExample) {
+ correctExample.append("<marker>");
+ } else if (qName.equals(MARKER) && inIncorrectExample) {
+ incorrectExample.append("<marker>");
+ } else if (qName.equals("unification")) {
+ uFeature = attrs.getValue("feature");
+ inUnificationDef = true;
+ } else if (qName.equals("equivalence")) {
+ uType = attrs.getValue(TYPE);
+ } else if (qName.equals("phrases")) {
+ inPhrases = true;
+ } else if (qName.equals("includephrases")) {
+ phraseElementInit();
+ } else if (qName.equals("phrase") && inPhrases) {
+ phraseId = attrs.getValue("id");
+ } else if (qName.equals("phraseref") && (attrs.getValue("idref") != null)) {
+ preparePhrase(attrs);
+ } else if (qName.equals("source")) {
+ srcLang = Language.getLanguageForShortName(attrs.getValue("lang"));
+ }
+ }
+
+ @Override
+ public void endElement(final String namespaceURI, final String sName,
+ final String qName) throws SAXException {
+
+ if (qName.equals("source")) {
+ checkMarkPositions();
+ srcRule = finalizeRule();
+ } else if ("target".equals(qName)) {
+ checkMarkPositions();
+ trgRule = finalizeRule();
+ } else if ("rule".equals(qName)) {
+ trgRule.setMessage(message.toString());
+ if (suggestionMatches != null) {
+ for (final Match m : suggestionMatches) {
+ trgRule.addSuggestionMatch(m);
+ }
+ if (phraseElementList.size() <= 1) {
+ suggestionMatches.clear();
+ }
+ }
+ final BitextPatternRule bRule = new BitextPatternRule(srcRule, trgRule);
+ bRule.setCorrectBitextExamples(correctExamples);
+ bRule.setIncorrectBitextExamples(incorrectExamples);
+ bRule.setSourceLang(srcLang);
+ rules.add(bRule);
+ } else if (qName.equals(EXCEPTION)) {
+ finalizeExceptions();
+ } else if (qName.equals(AND)) {
+ inAndGroup = false;
+ andGroupCounter = 0;
+ tokenCounter++;
+ } else if (qName.equals(TOKEN)) {
+ finalizeTokens();
+ } else if (qName.equals(PATTERN)) {
+ inPattern = false;
+ if (lastPhrase) {
+ elementList.clear();
+ }
+ if (phraseElementList == null || phraseElementList.isEmpty()) {
+ checkPositions(0);
+ } else {
+ for (List<Element> elements : phraseElementList) {
+ checkPositions(elements.size());
+ }
+ }
+ tokenCounter = 0;
+ } else if (qName.equals("trgExample")) {
+ trgExample = setExample();
+ } else if (qName.equals("srcExample")) {
+ srcExample = setExample();
+ } else if (qName.equals("example")) {
+ if (inCorrectExample) {
+ correctExamples.add(new StringPair(srcExample.getExample(), trgExample.getExample()));
+ } else if (inIncorrectExample) {
+ if (trgExample.getCorrections() == null) {
+ incorrectExamples.add(
+ new IncorrectBitextExample(
+ new StringPair(
+ srcExample.getExample(), trgExample.getExample())
+ ));
+ } else {
+ List<String> l = trgExample.getCorrections();
+ String str [] = l.toArray (new String [l.size ()]);
+ incorrectExamples.add(
+ new IncorrectBitextExample(
+ new StringPair(srcExample.getExample(),
+ trgExample.getExample()), str)
+ );
+ }
+ }
+ inCorrectExample = false;
+ inIncorrectExample = false;
+ } else if (qName.equals("message")) {
+ suggestionMatches = addLegacyMatches();
+ inMessage = false;
+ } else if (qName.equals("short")) {
+ inShortMessage = false;
+ } else if (qName.equals("match")) {
+ if (inMessage) {
+ suggestionMatches.get(suggestionMatches.size() - 1).setLemmaString(
+ match.toString());
+ } else if (inToken) {
+ tokenReference.setLemmaString(match.toString());
+ }
+ inMatch = false;
+ } else if (qName.equals("rulegroup")) {
+ inRuleGroup = false;
+ } else if (qName.equals("suggestion") && inMessage) {
+ message.append("</suggestion>");
+ inSuggestion = false;
+ } else if (qName.equals(MARKER) && inCorrectExample) {
+ correctExample.append("</marker>");
+ } else if (qName.equals(MARKER) && inIncorrectExample) {
+ incorrectExample.append("</marker>");
+ } else if (qName.equals("phrase") && inPhrases) {
+ finalizePhrase();
+ } else if (qName.equals("includephrases")) {
+ elementList.clear();
+ } else if (qName.equals("phrases") && inPhrases) {
+ inPhrases = false;
+ } else if (qName.equals("unification")) {
+ inUnificationDef = false;
+ } else if (qName.equals("feature")) {
+ equivalenceFeatures.put(uFeature, uTypeList);
+ uTypeList = new ArrayList<String>();
+ } else if (qName.equals("unify")) {
+ inUnification = false;
+ //clear the features...
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ }
+ }
+
+ private IncorrectExample setExample() {
+ IncorrectExample example = null;
+ if (inCorrectExample) {
+ example = new IncorrectExample(correctExample.toString());
+ } else if (inIncorrectExample) {
+ final String[] corrections = exampleCorrection.toString().split("\\|");
+ if (corrections.length > 0 && corrections[0].length() > 0) {
+ example = new IncorrectExample(incorrectExample.toString(),
+ corrections);
+ } else {
+ example = new IncorrectExample(incorrectExample.toString());
+ }
+ }
+ correctExample = new StringBuilder();
+ incorrectExample = new StringBuilder();
+ exampleCorrection = new StringBuilder();
+ return example;
+ }
+
+ private PatternRule finalizeRule() {
+ PatternRule rule = null;
+ phraseElementInit();
+ if (phraseElementList.isEmpty()) {
+ rule = new PatternRule(id, language, elementList,
+ description, "", shortMessage.toString());
+ prepareRule(rule);
+ } else {
+ if (!elementList.isEmpty()) {
+ for (final ArrayList<Element> ph : phraseElementList) {
+ ph.addAll(new ArrayList<Element>(elementList));
+ }
+ }
+
+ for (final ArrayList<Element> phraseElement : phraseElementList) {
+ processElement(phraseElement);
+ rule = new PatternRule(id, language, phraseElement,
+ description, message.toString(), shortMessage.toString(),
+ phraseElementList.size() > 1);
+ prepareRule(rule);
+ }
+ }
+ elementList.clear();
+ if (phraseElementList != null) {
+ phraseElementList.clear();
+ }
+ startPositionCorrection = 0;
+ endPositionCorrection = 0;
+ return rule;
+ }
+ private void prepareRule(final PatternRule rule) {
+ rule.setStartPositionCorrection(startPositionCorrection);
+ rule.setEndPositionCorrection(endPositionCorrection);
+ startPositionCorrection = 0;
+ endPositionCorrection = 0;
+ rule.setCategory(category);
+ if (inRuleGroup)
+ rule.setSubId(Integer.toString(subId));
+ else
+ rule.setSubId("1");
+ caseSensitive = false;
+ if (defaultOff) {
+ rule.setDefaultOff();
+ }
+
+ if (category.isDefaultOff() && !defaultOn) {
+ rule.setDefaultOff();
+ }
+
+ }
+
+ @Override
+ public void characters(final char[] buf, final int offset, final int len) {
+ final String s = new String(buf, offset, len);
+ if (inException) {
+ exceptions.append(s);
+ } else if (inToken) {
+ elements.append(s);
+ } else if (inCorrectExample) {
+ correctExample.append(s);
+ } else if (inIncorrectExample) {
+ incorrectExample.append(s);
+ } else if (inMatch) {
+ match.append(s);
+ } else if (inMessage) {
+ message.append(s);
+ } else if (inShortMessage) {
+ shortMessage.append(s);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextXMLRuleHandler.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextXMLRuleHandler.java
new file mode 100644
index 0000000..02f5a04
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/BitextXMLRuleHandler.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns.bitext;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.bitext.IncorrectBitextExample;
+import de.danielnaber.languagetool.rules.patterns.XMLRuleHandler;
+
+/**
+ * XML rule handler that loads rules from XML and throws
+ * exceptions on errors and warnings.
+ *
+ * @author Daniel Naber
+ */
+class BitextXMLRuleHandler extends XMLRuleHandler {
+
+ List<BitextPatternRule> rules = new ArrayList<BitextPatternRule>();
+
+ List<StringPair> correctExamples = new ArrayList<StringPair>();
+ List<IncorrectBitextExample> incorrectExamples = new ArrayList<IncorrectBitextExample>();
+
+ List<BitextPatternRule> getBitextRules() {
+ return rules;
+ }
+
+ public void warning (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+ public void error (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/FalseFriendsAsBitextLoader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/FalseFriendsAsBitextLoader.java
new file mode 100644
index 0000000..87c30a5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/bitext/FalseFriendsAsBitextLoader.java
@@ -0,0 +1,72 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns.bitext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.FalseFriendRuleLoader;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * Loads the false friend rules as bitext pattern rules. Note that the resulting
+ * rules have suggestions that are not really customizable, in contradistinction
+ * to the 'real' bitext pattern rules.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class FalseFriendsAsBitextLoader {
+
+ public List<BitextPatternRule> getFalseFriendsAsBitext(final String filename,
+ final Language motherTongue, final Language language) throws ParserConfigurationException, SAXException, IOException {
+ final FalseFriendRuleLoader ruleLoader = new FalseFriendRuleLoader();
+ List<BitextPatternRule> bRules = new ArrayList<BitextPatternRule>();
+ List<PatternRule> rules1 =
+ ruleLoader.getRules(this.getClass().getResourceAsStream(filename),
+ motherTongue, language);
+ List<PatternRule> rules2 =
+ ruleLoader.getRules(this.getClass().getResourceAsStream(filename),
+ language, motherTongue);
+ HashMap<String, PatternRule> srcRules = new HashMap<String, PatternRule>();
+ for (PatternRule rule : rules1) {
+ srcRules.put(rule.getId(), rule);
+ }
+ for (PatternRule rule : rules2) {
+ if (srcRules.containsKey(rule.getId())) {
+ BitextPatternRule bRule = new BitextPatternRule(
+ srcRules.get(rule.getId()), rule);
+ bRule.setSourceLang(motherTongue);
+ bRule.setCategory(rule.getCategory());
+ bRules.add(bRule);
+ }
+ }
+ return bRules;
+ }
+
+}
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java
new file mode 100644
index 0000000..6d2ff17
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/CompoundRule.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Marcin Miłkowski, based on code by Daniel Naber
+ */
+
+public final class CompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/pl/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Brak łącznika lub zbędny łącznik");
+ super.setMsg("Ten wyraz pisze się z łącznikiem.",
+ "Ten wyraz pisze się razem (bez spacji ani łącznika).",
+ "Ten wyraz pisze się z łącznikiem lub bez niego.");
+ }
+
+ public final String getId() {
+ return "PL_COMPOUNDS";
+ }
+
+ public final String getDescription() {
+ return "Sprawdza wyrazy z łącznikiem, np. „łapu capu” zamiast „łapu-capu”";
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java
new file mode 100644
index 0000000..0a6f01b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishRule.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for Polish rules.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public abstract class PolishRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java
new file mode 100644
index 0000000..3b83133
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishUnpairedBracketsRule.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.pl;
+
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;
+
+public class PolishUnpairedBracketsRule extends GenericUnpairedBracketsRule {
+
+ private static final String[] PL_START_SYMBOLS = { "[", "(", "{", "„", "»", "\"" };
+ private static final String[] PL_END_SYMBOLS = { "]", ")", "}", "”", "«", "\"" };
+
+ public PolishUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages, language);
+ startSymbols = PL_START_SYMBOLS;
+ endSymbols = PL_END_SYMBOLS;
+ }
+
+ public String getId() {
+ return "PL_UNPAIRED_BRACKETS";
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java
new file mode 100644
index 0000000..a7dbb5e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/PolishWordRepeatRule.java
@@ -0,0 +1,200 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * @author Marcin Miłkowski
+ *
+ * Rule for detecting same words in the sentence but not just in a row
+ *
+ */
+public class PolishWordRepeatRule extends PolishRule {
+
+ /**
+ * Excluded dictionary words.
+ */
+ private static final Pattern EXC_WORDS = Pattern
+ .compile("nie|tuż|aż|to|siebie|być|ani|ni|albo|"
+ + "lub|czy|bądź|jako|zł|np|coraz"
+ + "|bardzo|bardziej|proc|ten|jak|mln|tys|swój|mój|"
+ + "twój|nasz|wasz|i|zbyt");
+
+ /**
+ * Excluded part of speech classes.
+ */
+ private static final Pattern EXC_POS = Pattern.compile("prep:.*|ppron.*");
+
+ /**
+ * Excluded non-words (special symbols, Roman numerals etc.
+ */
+ private static final Pattern EXC_NONWORDS = Pattern
+ .compile("&quot|&gt|&lt|&amp|[0-9].*|"
+ + "M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
+
+ public PolishWordRepeatRule(final ResourceBundle messages) {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ setDefaultOff();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#getId()
+ */
+ @Override
+ public final String getId() {
+ return "PL_WORD_REPEAT";
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#getDescription()
+ */
+ @Override
+ public final String getDescription() {
+ return "Powtórzenia wyrazów w zdaniu (monotonia stylistyczna)";
+ }
+
+ /*
+ * Tests if any word form is repeated in the sentence.
+ */
+ @Override
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ boolean repetition = false;
+ final TreeSet<String> inflectedWords = new TreeSet<String>();
+ String prevLemma, curLemma;
+ // start from real token, 0 = SENT_START
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+ // avoid "..." etc. to be matched:
+ boolean isWord = true;
+ boolean hasLemma = true;
+
+ if (token.length() < 2) {
+ isWord = false;
+ }
+
+ final int readingsLen = tokens[i].getReadingsLength();
+ for (int k = 0; k < readingsLen; k++) {
+ final String posTag = tokens[i].getAnalyzedToken(k).getPOSTag();
+ if (posTag != null) {
+ if (StringTools.isEmpty(posTag)) {
+ isWord = false;
+ break;
+ }
+ // FIXME: too many false alarms here:
+ final String lemma = tokens[i].getAnalyzedToken(k).getLemma();
+ if (lemma == null) {
+ hasLemma = false;
+ break;
+ }
+ final Matcher m1 = EXC_WORDS.matcher(lemma);
+ if (m1.matches()) {
+ isWord = false;
+ break;
+ }
+
+ final Matcher m2 = EXC_POS.matcher(posTag);
+ if (m2.matches()) {
+ isWord = false;
+ break;
+ }
+ } else {
+ hasLemma = false;
+ }
+
+ }
+
+ final Matcher m1 = EXC_NONWORDS.matcher(tokens[i].getToken());
+ if (m1.matches()) {
+ isWord = false;
+ }
+
+ prevLemma = "";
+ if (isWord) {
+ boolean notSentEnd = false;
+ for (int j = 0; j < readingsLen; j++) {
+ final String pos = tokens[i].getAnalyzedToken(j).getPOSTag();
+ if (pos != null) {
+ notSentEnd |= "SENT_END".equals(pos);
+ }
+ if (hasLemma) {
+ curLemma = tokens[i].getAnalyzedToken(j).getLemma();
+ if (!prevLemma.equals(curLemma) && !notSentEnd) {
+ if (inflectedWords.contains(curLemma)) {
+ repetition = true;
+ } else {
+ inflectedWords.add(tokens[i].getAnalyzedToken(j).getLemma());
+ }
+ }
+ prevLemma = curLemma;
+ } else {
+ if (inflectedWords.contains(tokens[i].getToken()) && !notSentEnd) {
+ repetition = true;
+ } else {
+ inflectedWords.add(tokens[i].getToken());
+ }
+ }
+
+ }
+ }
+
+ if (repetition) {
+ final String msg = "Powtórzony wyraz w zdaniu";
+ final int pos = tokens[i].getStartPos();
+ final RuleMatch ruleMatch = new RuleMatch(this, pos, pos
+ + token.length(), msg, "Powtórzenie wyrazu");
+ ruleMatches.add(ruleMatch);
+ repetition = false;
+ }
+
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.Rule#reset()
+ */
+ @Override
+ public void reset() {
+ // nothing
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java
new file mode 100644
index 0000000..90708d9
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/pl/SimpleReplaceRule.java
@@ -0,0 +1,82 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.pl;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Polish implementations. Loads the list of words from
+ * <code>rules/pl/replace.txt</code>.
+ *
+ * @author Marcin Miłkowski
+ */
+public class SimpleReplaceRule extends AbstractSimpleReplaceRule {
+
+ public static final String POLISH_SIMPLE_REPLACE_RULE = "PL_SIMPLE_REPLACE";
+
+ private static final String FILE_NAME = "/pl/replace.txt";
+ // locale used on case-conversion
+ private static final Locale PL_LOCALE = new Locale("pl");
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+
+ public SimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return POLISH_SIMPLE_REPLACE_RULE;
+ }
+
+ public String getDescription() {
+ return "Typowe literówki";
+ }
+
+ public String getShort() {
+ return "Literówka";
+ }
+
+ public String getSuggestion() {
+ return " to typowa literówka, poprawnie: ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return PL_LOCALE;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/CompoundRule.java
new file mode 100644
index 0000000..bb9dea8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/CompoundRule.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ro;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Ionuț Păduraru, based on code by Daniel Naber
+ */
+public class CompoundRule extends AbstractCompoundRule {
+
+ public static final String ROMANIAN_COMPOUND_RULE = "RO_COMPOUND";
+ private static final String FILE_NAME = "/ro/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Problemă de scriere (cratimă, spațiu, etc.)");
+ super.setMsg("Cuvântul se scrie cu cratimă.",
+ "Cuvântul se scrie legat.",
+ "Cuvântul se scrie legat sau cu cratimă.");
+ // default value (2) is not ok for Romanian
+ setMaxUnHyphenatedWordCount(Integer.MAX_VALUE);
+ // there are words that should not be written with hyphen but as one word
+ setHyphenIgnored(false);
+ }
+
+ public String getId() {
+ return ROMANIAN_COMPOUND_RULE;
+ }
+
+ public String getDescription() {
+ return "Greșeală de scriere (cuvinte scrise legat sau cu cratimă)";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/SimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/SimpleReplaceRule.java
new file mode 100644
index 0000000..9e96513
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ro/SimpleReplaceRule.java
@@ -0,0 +1,264 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ro;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Queue;
+import java.util.ResourceBundle;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A rule that matches words which should not be used and suggests correct ones instead. <br/>
+ * Romanian implementations. Loads the list of words from
+ * <code>/ro/replace.txt</code>.<br/><br/>
+ *
+ * Unlike AbstractSimpleReplaceRule, supports multiple words (Ex: "aqua forte" => "acvaforte").<br/><br/>
+ *
+ * Note: Merge this into {@link AbstractSimpleReplaceRule} eventually and simply extend from AbstractSimpleReplaceRule.<br/>
+ *
+ * @author Ionuț Păduraru
+ * @version $Id$
+ *
+ */
+public class SimpleReplaceRule extends Rule {
+
+ public static final String ROMANIAN_SIMPLE_REPLACE_RULE = "RO_SIMPLE_REPLACE";
+
+ private static final String FILE_NAME = "/ro/replace.txt";
+ private static final String FILE_ENCODING = "utf-8";
+ // locale used on case-conversion
+ private static Locale roLocale = new Locale("ro");
+
+ // list of maps containing error-corrections pairs.
+ // the n-th map contains key strings of (n+1) words
+ private List<Map<String, String>> wrongWords;
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+
+ public SimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ wrongWords = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(getFileName()));
+ }
+
+ public final String getId() {
+ return ROMANIAN_SIMPLE_REPLACE_RULE;
+ }
+
+ public String getDescription() {
+ return "Cuvinte sau grupuri de cuvinte incorecte sau ieșite din uz";
+ }
+
+ public String getShort() {
+ return "Cuvânt incorect sau ieșit din uz";
+ }
+
+ public String getSuggestion() {
+ return " este incorect sau ieșit din uz, folosiți ";
+ }
+
+ /**
+ * @return the word used to separate multiple suggestions; used only before last suggestion, the rest are comma-separated.
+ */
+ public String getSuggestionsSeparator() {
+ return " sau ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return roLocale;
+ }
+
+ public String getEncoding() {
+ return FILE_ENCODING;
+ }
+
+ /**
+ * @return the word tokenizer used for tokenization on loading words.
+ */
+ protected Tokenizer getWordTokenizer() {
+ return Language.ROMANIAN.getWordTokenizer();
+ }
+
+ /**
+ * @return the list of wrong words for which this rule can suggest correction. The list cannot be modified.
+ */
+ public List<Map<String, String>> getWrongWords() {
+ return wrongWords;
+ }
+
+ /**
+ * Load the list of words. <br/>
+ * Same as {@link AbstractSimpleReplaceRule#loadWords} but allows multiple words.
+ * @param file the file to load.
+ * @return the list of maps containing the error-corrections pairs. <br/>The n-th map contains key strings of (n+1) words.
+ * @throws IOException when the file contains errors.
+ * @see #getWordTokenizer
+ */
+ private List<Map<String, String>> loadWords(final InputStream file)
+ throws IOException {
+ final List<Map<String, String>> list = new ArrayList<Map<String, String>>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, getEncoding());
+ br = new BufferedReader(isr);
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1 || line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ final String[] parts = line.split("=");
+ if (parts.length != 2) {
+ throw new IOException("Format error in file "
+ + JLanguageTool.getDataBroker().getFromRulesDirAsUrl(getFileName())
+ + ", line: " + line);
+ }
+ final String[] wrongForms = parts[0].split("\\|"); // multiple incorect forms
+ for (String wrongForm : wrongForms) {
+ int wordCount = 0;
+ final List<String> tokens = getWordTokenizer().tokenize(wrongForm);
+ for (String token : tokens) {
+ if (!StringTools.isWhitespace(token)) {
+ wordCount++;
+ }
+ }
+ // grow if necessary
+ for (int i = list.size(); i < wordCount; i++) {
+ list.add(new HashMap<String, String>());
+ }
+ list.get(wordCount - 1).put(wrongForm, parts[1]);
+ }
+ }
+
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ // seal the result (prevent modification from outside this class)
+ List<Map<String,String>> result = new ArrayList<Map<String, String>>();
+ for (Map<String, String> map : list) {
+ result.add(Collections.unmodifiableMap(map));
+ }
+ return Collections.unmodifiableList(result);
+ }
+
+ private void addToQueue(AnalyzedTokenReadings token,
+ Queue<AnalyzedTokenReadings> prevTokens) {
+ final boolean inserted = prevTokens.offer(token);
+ if (!inserted) {
+ prevTokens.poll();
+ prevTokens.offer(token);
+ }
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text
+ .getTokensWithoutWhitespace();
+
+ final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<AnalyzedTokenReadings>(wrongWords.size());
+
+ for (int i = 1; i < tokens.length; i++) {
+ addToQueue(tokens[i], prevTokens);
+ final StringBuilder sb = new StringBuilder();
+ final ArrayList<String> variants = new ArrayList<String>();
+ final List<AnalyzedTokenReadings> prevTokensList = Arrays.asList(prevTokens.toArray(new AnalyzedTokenReadings[] {}));
+ for (int j = prevTokensList.size() - 1; j >= 0; j--) {
+ if (j != prevTokensList.size() - 1 && prevTokensList.get(j + 1).isWhitespaceBefore())
+ sb.insert(0, " ");
+ sb.insert(0, prevTokensList.get(j).getToken());
+ variants.add(0, sb.toString());
+ }
+ final int len = variants.size(); // prevTokensList and variants have now the same length
+ for (int j = 0; j < len; j++) { // longest words first
+ final String crt = variants.get(j);
+ final int crtWordCount = len - j;
+ final String crtMatch = isCaseSensitive() ? wrongWords.get(crtWordCount - 1).get(crt) : wrongWords.get(crtWordCount- 1).get(crt.toLowerCase(getLocale()));
+ if (crtMatch != null) {
+ final List<String> replacements = Arrays.asList(crtMatch.split("\\|"));
+ String msg = crt + getSuggestion();
+ for (int k = 0; k < replacements.size(); k++) {
+ if (k > 0) {
+ msg = msg + (k == replacements.size() - 1 ? getSuggestionsSeparator(): ", ");
+ }
+ msg += "<suggestion>" + replacements.get(k) + "</suggestion>";
+ }
+ final int startPos = prevTokensList.get(len - crtWordCount).getStartPos();
+ final int endPos = prevTokensList.get(len - 1).getStartPos() + prevTokensList.get(len - 1).getToken().length();
+ final RuleMatch potentialRuleMatch = new RuleMatch(this, startPos, endPos, msg, getShort());
+
+ if (!isCaseSensitive() && StringTools.startsWithUppercase(crt)) {
+ for (int k = 0; k < replacements.size(); k++) {
+ replacements.set(k, StringTools.uppercaseFirstChar(replacements.get(k)));
+ }
+ }
+ potentialRuleMatch.setSuggestedReplacements(replacements);
+ ruleMatches.add(potentialRuleMatch);
+ break;
+ }
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ public void reset() {
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRule.java
new file mode 100644
index 0000000..4076a9c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RuSimpleReplaceRule.java
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ru;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Russian implementations. Loads the
+ * relevant words from <code>rules/ru/replace.txt</code>.
+ *
+ * @author Yakov Reztsov
+ */
+public class RuSimpleReplaceRule extends AbstractSimpleReplaceRule {
+
+ private static final String FILE_NAME = "/ru/replace.txt";
+
+ // locale used on case-conversion
+ private static final Locale RU_LOCALE = new Locale("ru");
+
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+ public RuSimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return "RU_SIMPLE_REPLACE";
+ }
+
+ public String getDescription() {
+ return "Поиск ошибочных слов/фраз";
+ }
+
+public String getShort() {
+ return "Ошибка?";
+ }
+
+ public String getSuggestion() {
+ return " - ошибочное слово/фраза, исправление: ";
+ }
+
+ /**
+ * use case-insensitive matching.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * locale used on case-conversion
+ */
+ public Locale getLocale() {
+ return RU_LOCALE;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianCompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianCompoundRule.java
new file mode 100644
index 0000000..3e7d889
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianCompoundRule.java
@@ -0,0 +1,57 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ru;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ * Russian compounds rule.
+ * @author Yakov Reztsov
+ *
+ * Based on German compounds rule.
+ * @author Daniel Naber
+ *
+ */
+public class RussianCompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/ru/compounds_ru.txt";
+
+ public RussianCompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setMsg("Эти слова должны быть написаны через дефис.",
+ "Эти слова должны быть написаны слитно.",
+ "Эти слова могут быть написаны через дефис или слитно.");
+
+ }
+
+ public String getId() {
+ return "RU_COMPOUNDS";
+ }
+
+ public String getDescription() {
+ return "Правописание через дефис";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianRule.java
new file mode 100644
index 0000000..030abf2
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianRule.java
@@ -0,0 +1,30 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.ru;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for rules for the Russian language.
+ *
+ * @author
+ */
+public abstract class RussianRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRule.java
new file mode 100644
index 0000000..75dd86b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/ru/RussianUnpairedBracketsRule.java
@@ -0,0 +1,62 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.ru;
+
+import java.util.ResourceBundle;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.GenericUnpairedBracketsRule;
+
+public class RussianUnpairedBracketsRule extends GenericUnpairedBracketsRule {
+
+ private static final String[] RU_START_SYMBOLS = { "[", "(", "{", "„", "«", "\"", "'" };
+ private static final String[] RU_END_SYMBOLS = { "]", ")", "}", "“", "»", "\"", "'" };
+
+ private static final Pattern NUMERALS_RU = Pattern
+ .compile("(?i)\\d{1,2}?[а-я]*|[а-я]|[А-Я]|[а-я][а-я]|[А-Я][А-Я]");
+
+
+ protected boolean isNoException(final String token,
+ final AnalyzedTokenReadings[] tokens, final int i, final int j,
+ final boolean precSpace,
+ final boolean follSpace) {
+ // exception for Russian bullets: а), б), Д)..., ДД), аа) and 1а).
+ if (i > 1 && endSymbols[j].equals(")") &&
+ NUMERALS_RU.matcher(tokens[i - 1].getToken()).matches() &&
+ !(!symbolStack.empty() && "(".equals(symbolStack.peek().symbol))) {
+ return false;
+ }
+ return true;
+ }
+
+ public RussianUnpairedBracketsRule(final ResourceBundle messages,
+ final Language language) {
+ super(messages, language);
+ startSymbols = RU_START_SYMBOLS;
+ endSymbols = RU_END_SYMBOLS;
+ }
+
+ public String getId() {
+ return "RU_UNPAIRED_BRACKETS";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/CompoundRule.java
new file mode 100644
index 0000000..d5260bf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/CompoundRule.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.sk;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.AbstractCompoundRule;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Zdenko Podobný based on code by Marcin Miłkowski, Daniel Naber
+ */
+
+public final class CompoundRule extends AbstractCompoundRule {
+
+ private static final String FILE_NAME = "/sk/compounds.txt";
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ super.setShort("Problém spájania slov");
+ super.setMsg("Toto slovo sa zvyčajne píše so spojovníkom.",
+ "Toto slovo sa obvykle píše bez spojovníka.",
+ "Tento výraz sa bežne píše s alebo bez spojovníka.");
+ }
+
+ public final String getId() {
+ return "SK_COMPOUNDS";
+ }
+
+ public final String getDescription() {
+ return "Slová so spojovníkom napr. použite „česko-slovenský” namiesto „česko slovenský”";
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakRule.java
new file mode 100644
index 0000000..f28067a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakRule.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.sk;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for Polish rules.
+ *
+ * @author Zdenko Podobný based on Polish rules
+ *
+ */
+public abstract class SlovakRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakVes.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakVes.java
new file mode 100644
index 0000000..3fff582
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sk/SlovakVes.java
@@ -0,0 +1,146 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Luboš Lehotský lubo.lehotsky (at) gmail (dot) com
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.sk;
+
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+
+public class SlovakVes extends SlovakRule {
+
+ public SlovakVes(final ResourceBundle messages) {
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ setDefaultOff();
+ }
+
+ @Override
+ public final String getId() {
+ return "SK_VES";
+ }
+
+ @Override
+ public final String getDescription() {
+ return "Názvy obcí, v ktorých je \"Ves\"";
+ }
+
+ @Override
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ // never read boolean prve_uvodzovky;
+ boolean tag, tag2, tag3;
+ final String pomoc;
+ final char znak;
+
+// never read prve_uvodzovky = false;
+ tag = false;
+ tag2 = false;
+ tag3 = false;
+
+ pomoc = tokens[1].getToken();
+ if (pomoc.length() >= 1) {
+ znak = pomoc.charAt(0);
+ } else {
+ znak = '.';
+ }
+
+ if (znak == '?') {
+// never read prve_uvodzovky = true;
+ }
+ for (int i = 1; i < tokens.length; i++) {
+ final String token = tokens[i].getToken();
+// never read String premenna = token.toString();
+ final char pomocnik;
+// never read final int help;
+ boolean bodka;
+ boolean pady;
+
+ pady = false;
+ pomocnik = token.charAt(0);
+ bodka = false;
+ if (token.charAt(0) == '.' || token.charAt(0) == '?'
+ || token.charAt(0) == '!') {
+ bodka = true;
+ }
+
+ if (tokens[i].hasPosTag("AAfs1x") || tokens[i].hasPosTag("AAfs2x")
+ || tokens[i].hasPosTag("AAfs3x")
+ || tokens[i].hasPosTag("AAfs4x")
+ || tokens[i].hasPosTag("AAfs6x")
+ || tokens[i].hasPosTag("AAfs7x")) {
+ pady = true;
+ }
+ if (pady && Character.isUpperCase(pomocnik)) {
+ tag = true;
+ }
+
+ if (tag && !tag2) {
+ if (pady && Character.isLowerCase(pomocnik)) {
+ tag2 = true;
+ // premenna = tokens[i].getToken();
+ }
+
+ }
+
+ if (tag2) {
+ if (token.equals("Ves") || token.equals("Vsi")
+ || token.equals("Vsou")) {
+ tag3 = true;
+ }
+ }
+ if (tag3 && !bodka) {
+ String spravne;
+ char prve;
+
+ prve = tokens[i - 1].getToken().charAt(0);
+ prve = Character.toUpperCase(prve);
+ spravne = tokens[i - 1].getToken().substring(1,
+ tokens[i - 1].getToken().length());
+
+ final String msg = "Zmeňte začiatočné písmeno na veľké: <suggestion> "
+ + prve + spravne + " </suggestion>";
+ final int pos = tokens[i - 1].getStartPos();
+ final int pos2 = tokens[i - 1].getToken().length();
+ final RuleMatch ruleMatch = new RuleMatch(this, pos, pos + pos2,
+ msg, "Zmeňte začiatočné písmeno na veľké: ");
+
+ ruleMatches.add(ruleMatch);
+
+ }
+
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ @Override
+ public void reset() {// nothing
+ }
+
+}
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/CompoundRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/CompoundRule.java
new file mode 100644
index 0000000..b3087cd
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/CompoundRule.java
@@ -0,0 +1,247 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.sv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.ResourceBundle;
+import java.util.Set;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.Category;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Checks that compounds (if in the list) are not written as separate words.
+ *
+ * @author Daniel Naber
+ */
+public class CompoundRule extends SwedishRule {
+ //TODO for words with more then one part check if parts of it is compounded.
+ //in env. allt-i-genom+ should match "allt i genom", "allt igenom" as well as "allti genom"
+ private static final String FILE_NAME = "/sv/compounds.txt";
+
+ private final static int MAX_TERMS = 5;
+
+ private final Set<String> incorrectCompounds = new HashSet<String>();
+ private final Set<String> noDashSuggestion = new HashSet<String>();
+ private final Set<String> onlyDashSuggestion = new HashSet<String>();
+
+ public CompoundRule(final ResourceBundle messages) throws IOException {
+ if (messages != null)
+ super.setCategory(new Category(messages.getString("category_misc")));
+ loadCompoundFile(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILE_NAME), "UTF-8");
+ }
+
+ public String getId() {
+ return "SV_COMPOUNDS";
+ }
+
+ public String getDescription() {
+ return "Särskrivningar, t.ex. 'cd rom' bör skrivas 'cd-rom'";
+ }
+
+ public RuleMatch[] match(final AnalyzedSentence text) {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ RuleMatch prevRuleMatch = null;
+ final Queue<AnalyzedTokenReadings> prevTokens = new ArrayBlockingQueue<AnalyzedTokenReadings>(MAX_TERMS);
+ for (int i = 0; i < tokens.length + MAX_TERMS-1; i++) {
+ AnalyzedTokenReadings token = null;
+ // we need to extend the token list so we find matches at the end of the original list:
+ if (i >= tokens.length)
+ token = new AnalyzedTokenReadings(new AnalyzedToken("", "", null), prevTokens.peek().getStartPos());
+ else
+ token = tokens[i];
+ if (i == 0) {
+ addToQueue(token, prevTokens);
+ continue;
+ }
+
+ final StringBuilder sb = new StringBuilder();
+ int j = 0;
+ AnalyzedTokenReadings firstMatchToken = null;
+ final List<String> stringsToCheck = new ArrayList<String>();
+ final List<String> origStringsToCheck = new ArrayList<String>(); // original upper/lowercase spelling
+ final Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<String, AnalyzedTokenReadings>();
+ for (AnalyzedTokenReadings atr : prevTokens) {
+ if (j == 0)
+ firstMatchToken = atr;
+ sb.append(' ');
+ sb.append(atr.getToken());
+ if (j >= 1) {
+ final String stringToCheck = normalize(sb.toString());
+ stringsToCheck.add(stringToCheck);
+ origStringsToCheck.add(sb.toString().trim());
+ if (!stringToToken.containsKey(stringToCheck))
+ stringToToken.put(stringToCheck, atr);
+ }
+ j++;
+ }
+ // iterate backwards over all potentially incorrect strings to make
+ // sure we match longer strings first:
+ for (int k = stringsToCheck.size()-1; k >= 0; k--) {
+ final String stringToCheck = stringsToCheck.get(k);
+ final String origStringToCheck = origStringsToCheck.get(k);
+ //System.err.println("##"+stringtoCheck+"#");
+ if (incorrectCompounds.contains(stringToCheck)) {
+ final AnalyzedTokenReadings atr = stringToToken.get(stringToCheck);
+ String msg = null;
+ final List<String> repl = new ArrayList<String>();
+ if (!noDashSuggestion.contains(stringToCheck)) {
+ repl.add(origStringToCheck.replace(' ', '-'));
+ msg = "Dessa ord skrivs samman med bindesträck.";
+ }
+ // Do not assume that compounds with more than two parts should always use hyphens:
+ if (!hasAllUppercaseParts(origStringToCheck) && !onlyDashSuggestion.contains(stringToCheck)) {
+ repl.add(mergeCompound(origStringToCheck));
+ msg = "Dessa ord skrivs samman.";
+ }
+ final String[] parts = stringToCheck.split(" ");
+ if (parts.length > 0) {
+ repl.clear();
+ repl.add(origStringToCheck.replace(' ', '-'));
+ msg = "Dessa ord skrivs samman med bindesträck.";
+ } else if (repl.size() == 0 || repl.size() == 2) { // == 0 shouldn't happen
+ // did not work as expected so I added repl. explicitly.
+ msg = "Dessa ord skrivs samman med eller utan bindesträck.";
+ repl.clear();
+ repl.add(origStringToCheck.replace(' ', '-'));
+ repl.add(mergeCompound(origStringToCheck));
+ }
+ final RuleMatch ruleMatch = new RuleMatch(this, firstMatchToken.getStartPos(),
+ atr.getStartPos() + atr.getToken().length(), msg);
+ // avoid duplicate matches:
+ if (prevRuleMatch != null && prevRuleMatch.getFromPos() == ruleMatch.getFromPos()) {
+ prevRuleMatch = ruleMatch;
+ break;
+ }
+ prevRuleMatch = ruleMatch;
+ ruleMatch.setSuggestedReplacements(repl);
+ ruleMatches.add(ruleMatch);
+ break;
+ }
+ }
+ addToQueue(token, prevTokens);
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ /**
+ * Replaces dashes with whitespace
+ * e.g. "E-Mail Adresse" -> "E Mail Adresse" so the error can be detected:
+ * @param str
+ * @return str
+ */
+ private String normalize(String str) {
+ str = str.trim().toLowerCase();
+ if (str.indexOf('-') != -1 && str.indexOf(' ') != -1) {
+ // e.g. "E-Mail Adresse" -> "E Mail Adresse" so the error can be detected:
+ str = str.replace('-', ' ');
+ }
+ return str;
+ }
+
+ private boolean hasAllUppercaseParts(String str) {
+ final String[] parts = str.split(" ");
+ for (String part : parts) {
+ if (StringTools.isAllUppercase(part)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private String mergeCompound(String str) {
+ final String[] stringParts = str.split(" ");
+ final StringBuilder sb = new StringBuilder();
+ for (int k = 0; k < stringParts.length; k++) {
+ if (k == 0)
+ sb.append(stringParts[k]);
+ else
+ sb.append(stringParts[k].toLowerCase());
+ }
+ return sb.toString();
+ }
+
+ private void addToQueue(AnalyzedTokenReadings token, Queue<AnalyzedTokenReadings> prevTokens) {
+ final boolean inserted = prevTokens.offer(token);
+ if (!inserted) {
+ prevTokens.poll();
+ prevTokens.offer(token);
+ }
+ }
+
+ private void loadCompoundFile(final InputStream file, final String encoding) throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, encoding);
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ // the set contains the incorrect spellings, i.e. the ones without hyphen
+ line = line.replace('-', ' ');
+ final String[] parts = line.split(" ");
+ if (parts.length > MAX_TERMS)
+ throw new IOException("För många ord sammansatta: " + line + ", max antal tillåtna ord: " + MAX_TERMS);
+ if (parts.length == 1)
+ throw new IOException("Inget sammansatt ord: " + line);
+ if (line.endsWith("+")) {
+ line = line.substring(0, line.length() - 1); // cut off "+"
+ noDashSuggestion.add(line.toLowerCase());
+ } else if (line.endsWith("*")) {
+ line = line.substring(0, line.length() - 1); // cut off "*"
+ onlyDashSuggestion.add(line.toLowerCase());
+ }
+ incorrectCompounds.add(line.toLowerCase());
+ }
+ } finally {
+ if (br != null) br.close();
+ if (isr != null) isr.close();
+ }
+ }
+
+ public void reset() {
+ }
+
+}
+
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/SwedishRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/SwedishRule.java
new file mode 100644
index 0000000..73af8fe
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/sv/SwedishRule.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.sv;
+
+import de.danielnaber.languagetool.rules.Rule;
+
+/**
+ * Abstract base class for Swedish rules.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public abstract class SwedishRule extends Rule {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/PunctuationCheckRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/PunctuationCheckRule.java
new file mode 100644
index 0000000..5abc339
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/PunctuationCheckRule.java
@@ -0,0 +1,76 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.uk;
+
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractPunctuationCheckRule;
+
+/**
+ * A rule that matches "..", "::", "-," but not "...", "!..", "?!!", ",-" etc
+ * TODO: spaces seem to be special, extract from regexp?
+ *
+ * @author Andriy Rysin
+ */
+public class PunctuationCheckRule extends AbstractPunctuationCheckRule {
+
+ public PunctuationCheckRule(final ResourceBundle messages) {
+ super(messages);
+ // super.setCategory(new Category(messages.getString("category_misc")));
+ }
+
+ // private boolean isTripleOk(String token) {
+ // return token.matches("^[.!?]$");
+ // }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * de.danielnaber.languagetool.rules.AbstractPunctuationCheckRule#isPunctsJoinOk
+ * (java.lang.String)
+ */
+ protected final boolean isPunctsJoinOk(final String tokens) {
+ return // we ignore duplicated spaces - too many errors
+ tokens.matches("([,:] | *- |,- | ) *") // internal puctuation
+ || tokens
+ .matches("([.!?]|!!!|\\?\\?\\?|\\?!!|!\\.\\.|\\?\\.\\.|\\.\\.\\.) *");
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * de.danielnaber.languagetool.rules.AbstractPunctuationCheckRule#isPunctuation
+ * (java.lang.String)
+ */
+ protected final boolean isPunctuation(final String token) {
+ return token.matches("^[.,!?: -]$");
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see de.danielnaber.languagetool.rules.AbstractPunctuationCheckRule#reset()
+ */
+ public void reset() {
+ // nothing
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/SimpleReplaceRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/SimpleReplaceRule.java
new file mode 100644
index 0000000..3bba01c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/uk/SimpleReplaceRule.java
@@ -0,0 +1,50 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.uk;
+
+import java.io.IOException;
+import java.util.ResourceBundle;
+
+import de.danielnaber.languagetool.rules.AbstractSimpleReplaceRule;
+
+/**
+ * A rule that matches words or phrases which should not be used and suggests
+ * correct ones instead.
+ *
+ * Ukrainian implementations. Loads the
+ * relevant words from <code>rules/uk/replace.txt</code>.
+ *
+ * @author Andriy Rysin
+ */
+public class SimpleReplaceRule extends AbstractSimpleReplaceRule {
+
+ private static final String FILE_NAME = "/uk/replace.txt";
+
+ public final String getFileName() {
+ return FILE_NAME;
+ }
+ public SimpleReplaceRule(final ResourceBundle messages) throws IOException {
+ super(messages);
+ }
+
+ public final String getId() {
+ return "UK_SIMPLE_REPLACE";
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/server/HTTPServer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/server/HTTPServer.java
new file mode 100644
index 0000000..7e1dc99
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/server/HTTPServer.java
@@ -0,0 +1,341 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.server;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import com.prolixtech.jaminid.ContentOracle;
+import com.prolixtech.jaminid.Daemon;
+import com.prolixtech.jaminid.ProtocolResponseHeader;
+import com.prolixtech.jaminid.Request;
+import com.prolixtech.jaminid.Response;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A small embedded HTTP server that checks text. Returns XML, prints debugging
+ * to stdout/stderr.
+ *
+ * @author Daniel Naber
+ */
+public class HTTPServer extends ContentOracle {
+
+ /**
+ * JLanguageTool instances for each language (created and configured on fist use).
+ * Instances are organized by language and mother language.
+ * This is like a tree: first level contain the Languages, next level contains JLanguageTool instances for each mother tongue.
+ */
+ private static final Map<Language, Map<Language, JLanguageTool>> instances = new HashMap<Language, Map<Language, JLanguageTool>>();
+ /**
+ * The default port on which the server is running (8081).
+ */
+ public static final int DEFAULT_PORT = 8081;
+
+ private static final int CONTEXT_SIZE = 40; // characters
+
+ private Daemon daemon;
+ private int port = DEFAULT_PORT;
+ private boolean verbose;
+
+ private static final Set<String> allowedIPs = new HashSet<String>();
+ static {
+ // accept only requests from localhost.
+ // TODO: find a cleaner solution
+ allowedIPs.add("/0:0:0:0:0:0:0:1"); // Suse Linux IPv6 stuff
+ allowedIPs.add("/0:0:0:0:0:0:0:1%0"); // some(?) Mac OS X
+ allowedIPs.add("/127.0.0.1");
+ }
+
+ /**
+ * Prepare a server - use run() to start it.
+ */
+ public HTTPServer() {
+ }
+
+ /**
+ * Prepare a server on the given port - use run() to start it.
+ */
+ public HTTPServer(int port) {
+ this(port, false);
+ }
+
+ /**
+ * Prepare a server on the given port - use run() to start it.
+ *
+ * @param verbose
+ * if true, the text to check will be displayed in case of exceptions
+ * (default: false)
+ */
+ public HTTPServer(int port, boolean verbose) {
+ this.port = port;
+ this.verbose = verbose;
+ }
+
+ /**
+ * Start the server.
+ */
+ public void run() {
+ System.out.println("Starting server on port " + port + "...");
+ daemon = new Daemon(port, this);
+ if (daemon.isRunning())
+ System.out.println("Server started");
+ else
+ throw new PortBindingException(
+ "LanguageTool server could not be started " + "on port " + port
+ + ", maybe something else is running on that port already?");
+ }
+
+ public String demultiplex(Request connRequest, Response connResponse) {
+ synchronized(instances){
+ final long timeStart = System.currentTimeMillis();
+ String text = null;
+ try {
+ if (StringTools.isEmpty(connRequest.getLocation())) {
+ connResponse.setStatus(403);
+ throw new RuntimeException("Error: Access to "
+ + connRequest.getLocation() + " denied");
+ }
+ if (allowedIPs.contains(connRequest.getIPAddressString())) {
+ // TODO: temporary fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly)
+ // see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
+ fixRequestParamMap(connRequest);
+
+ // return content base on request string.
+ // Refactor this when the number of known request types gets too big.
+
+ // request type: list known languages
+ if (connRequest.getLocation().endsWith("/Languages")) {
+ connResponse.setHeaderLine(ProtocolResponseHeader.Content_Type, "text/xml");
+ connResponse.setHeaderLine(ProtocolResponseHeader.Content_Encoding, "UTF-8");
+ return getSupportedLanguagesAsXML();
+ }
+
+ // request type: grammar checking (default type)
+ final String langParam = connRequest.getParamOrNull("language");
+ if (langParam == null)
+ throw new IllegalArgumentException("Missing 'language' parameter");
+ final Language lang = Language.getLanguageForShortName(langParam);
+ if (lang == null)
+ throw new IllegalArgumentException("Unknown language '" + langParam
+ + "'");
+ final String motherTongueParam = connRequest.getParamOrNull("motherTongue");
+ Language motherTongue = null;
+ if (null != motherTongueParam)
+ motherTongue = Language.getLanguageForShortName(motherTongueParam);
+ final JLanguageTool lt = getLanguageToolInstance(lang, motherTongue);
+ // TODO: how to take options from the client?
+ // TODO: customize lt here after reading client options
+ text = connRequest.getParamOrNull("text");
+ if (text == null)
+ throw new IllegalArgumentException("Missing 'text' parameter");
+ print("Checking " + text.length() + " characters of text, language "
+ + langParam);
+ final List<RuleMatch> matches = lt.check(text);
+ connResponse.setHeaderLine(ProtocolResponseHeader.Content_Type,
+ "text/xml");
+ // TODO: how to set the encoding to utf-8 if we can just return a
+ // String?
+ connResponse.setHeaderLine(ProtocolResponseHeader.Content_Encoding,
+ "UTF-8");
+ final String response = StringTools.ruleMatchesToXML(matches, text,
+ CONTEXT_SIZE, StringTools.XmlPrintMode.NORMAL_XML);
+ print("Check done in " + (System.currentTimeMillis() - timeStart)
+ + "ms");
+ return response;
+ }
+ connResponse.setStatus(403);
+ throw new RuntimeException("Error: Access from "
+ + connRequest.getIPAddressString() + " denied");
+ } catch (Exception e) {
+ if (verbose)
+ print("Exceptions was caused by this text: " + text);
+ e.printStackTrace();
+ connResponse.setStatus(500);
+ // escape input to avoid XSS attacks:
+ return "Error: " + StringTools.escapeXML(e.toString());
+ }
+ }
+ }
+
+ private void print(String s) {
+ System.out.println(getDate() + " " + s);
+ }
+
+ private String getDate() {
+ final SimpleDateFormat sdf = new SimpleDateFormat();
+ return sdf.format(new Date());
+ }
+
+ /**
+ * Stop the server process.
+ */
+ public void stop() {
+ System.out.println("Stopping server...");
+ daemon.tearDown();
+ System.out.println("Server stopped");
+ }
+
+ private static void printUsageAndExit() {
+ System.out.println("Usage: HTTPServer [-p|--port port]");
+ System.exit(1);
+ }
+
+ /**
+ * Private fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly)
+ * see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
+ *
+ * @param connRequest the Request object from jaminid ContentOracle.
+ * @throws UnsupportedEncodingException If character encoding needs to be consulted, but named character encoding is not supported.
+ */
+ private void fixRequestParamMap(final Request connRequest) throws UnsupportedEncodingException {
+ final Map<String, String> paramMap = getParamMap(connRequest);
+ connRequest.getParamMap().clear();
+ connRequest.getParamMap().putAll(paramMap);
+ }
+
+ /**
+ * Private fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly)
+ * see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
+ * Method to get the requst parameters from the request string. The default implementation can't handle
+ * the UTF-8 characters (like șțîâ). We just use URLDecoder.decode() instead of the default unescape private method.
+ * @param connRequest the Request object from jaminid ContentOracle.
+ * @return the parameters map.
+ * @throws UnsupportedEncodingException If character encoding needs to be consulted, but named character encoding is not supported
+ */
+ private Map<String, String> getParamMap(Request connRequest) throws UnsupportedEncodingException {
+ final Map<String, String> paramMap = new HashMap<String, String>();
+ if (null == connRequest)
+ return paramMap;
+ String requestStr = null;
+ if (!StringTools.isEmpty(connRequest.getBody())) {
+ requestStr = connRequest.getBody(); // POST
+ } else {
+ requestStr = connRequest.getParamString(); // GET
+ }
+ if (StringTools.isEmpty(requestStr))
+ return paramMap;
+
+ final String[] comps = requestStr.split("&");
+ for (String comp : comps) {
+ final int equalsLoc = comp.indexOf("=");
+ if (equalsLoc > 0) {
+ paramMap.put(comp.substring(0, equalsLoc),
+ URLDecoder.decode(comp.substring(equalsLoc + 1), "UTF-8"));
+ // TODO: Find some way to determine the encoding used on client-side
+ // maybe "Accept-Charset" request header could be used.
+ // UTF-8 will work on most platforms and browsers.
+ } else {
+ paramMap.put(comp, "");
+ }
+ }
+ return paramMap;
+ }
+
+ /**
+ * Find or create a JLanguageTool instance for a specific language and mother tongue.
+ * The instance will be reused. If any customization is required (like disabled rules),
+ * it will be done after acquiring this instance.
+ *
+ * @param lang the language to be used.
+ * @param motherTongue the user's mother tongue or <code>null</code>
+ * @return a JLanguageTool instance for a specific language and mother tongue.
+ * @throws Exception when JLanguageTool creation failed
+ */
+ private JLanguageTool getLanguageToolInstance(Language lang, Language motherTongue)
+ throws Exception {
+ Map<Language, JLanguageTool> languageTools = instances.get(lang);
+ if (null == languageTools) {
+ // first call using this language
+ languageTools = new HashMap<Language, JLanguageTool>();
+ instances.put(lang, languageTools);
+ }
+ final JLanguageTool languageTool = languageTools.get(motherTongue);
+ if (null == languageTool) {
+ print("Creating JLanguageTool instance for language " + lang + ((null != motherTongue)?(" and mother tongue " + motherTongue):""));
+ final JLanguageTool newLanguageTool = new JLanguageTool(lang, motherTongue);
+ newLanguageTool.activateDefaultPatternRules();
+ newLanguageTool.activateDefaultFalseFriendRules();
+ languageTools.put(motherTongue, newLanguageTool);
+ return newLanguageTool;
+ }
+ return languageTool;
+ }
+
+ /**
+ * Construct an xml string containing all supported languages. <br/>The xml format is:<br/>
+ * &lt;languages&gt;<br/>
+ * &nbsp;&nbsp;&lt;language name="Catalan" abbr="ca" /&gt;<br/>
+ * &nbsp;&nbsp;&lt;language name="Dutch" abbr="nl" /&gt;<br/>
+ * &nbsp;&nbsp;...<br/>
+ * &lt;languages&gt;<br/>
+ * The languages are alphabetically sorted.
+ * @return an xml string containing all supported languages.
+ */
+ public static String getSupportedLanguagesAsXML() {
+ final List<Language> languages = Arrays.asList(Language.REAL_LANGUAGES);
+ Collections.sort(languages,
+ new Comparator<Language>() {
+ public int compare(Language o1, Language o2) {
+ return o1.getName().compareTo(o2.getName());
+ }
+ });
+ final StringBuilder xmlBuffer = new StringBuilder("<?xml version='1.0' encoding='UTF-8'?>\n<languages>\n");
+ for (Language lang : languages) {
+ xmlBuffer.append(String.format("\t<language name=\"%s\" abbr=\"%s\" /> \n", lang.getName(), lang.getShortName()));
+ }
+ xmlBuffer.append("</languages>\n");
+ return xmlBuffer.toString();
+ }
+
+ /**
+ * Start the server from command line. Usage:
+ * <tt>HTTPServer [-v|--verbose] [-p|--port port]</tt>
+ */
+ public static void main(String[] args) {
+ if (args.length > 3) {
+ printUsageAndExit();
+ }
+ boolean verbose = false;
+ int port = DEFAULT_PORT;
+ for (int i = 0; i < args.length; i++) {
+ if ("-p".equals(args[i]) || "--port".equals(args[i])) {
+ port = Integer.parseInt(args[++i]);
+ } else if ("-v".equals(args[i]) || "--verbose".equals(args[i])) {
+ verbose = true;
+ }
+ }
+ final HTTPServer server = new HTTPServer(port, verbose);
+ server.run();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/server/PortBindingException.java b/JLanguageTool/src/java/de/danielnaber/languagetool/server/PortBindingException.java
new file mode 100644
index 0000000..de3ae56
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/server/PortBindingException.java
@@ -0,0 +1,36 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.server;
+
+public class PortBindingException extends RuntimeException {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = -8416700513887041339L;
+
+ PortBindingException(String message) {
+ super(message);
+ }
+
+ PortBindingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/BaseSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/BaseSynthesizer.java
new file mode 100644
index 0000000..49ce47c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/BaseSynthesizer.java
@@ -0,0 +1,87 @@
+package de.danielnaber.languagetool.synthesis;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.tools.Tools;
+
+public class BaseSynthesizer implements Synthesizer {
+
+ protected IStemmer synthesizer;
+
+ private ArrayList<String> possibleTags;
+
+ private final String tagsFileName;
+ private final String resourceFileName;
+
+ public BaseSynthesizer(final String resFile, final String tagFile) {
+ tagsFileName = tagFile;
+ resourceFileName = resFile;
+ }
+
+ /**
+ * Get a form of a given AnalyzedToken, where the form is defined by a
+ * part-of-speech tag.
+ *
+ * @param token
+ * AnalyzedToken to be inflected.
+ * @param posTag
+ * A desired part-of-speech tag.
+ * @return String value - inflected word.
+ */
+ public String[] synthesize(final AnalyzedToken token, final String posTag)
+ throws IOException {
+ if (synthesizer == null) {
+ final URL url = this.getClass().getResource(resourceFileName);
+ synthesizer = new DictionaryLookup(Dictionary.read(url));
+ }
+ final List<WordData> wordData = synthesizer.lookup(token.getLemma() + "|" + posTag);
+ final List<String> wordForms = new ArrayList<String>();
+ for (WordData wd : wordData) {
+ wordForms.add(wd.getStem().toString());
+ }
+ return wordForms.toArray(new String[wordForms.size()]);
+ }
+
+ public String[] synthesize(final AnalyzedToken token, final String posTag,
+ final boolean posTagRegExp) throws IOException {
+ if (posTagRegExp) {
+ if (possibleTags == null) {
+ possibleTags = SynthesizerTools.loadWords(Tools
+ .getStream(tagsFileName));
+ }
+ if (synthesizer == null) {
+ final URL url = this.getClass().getResource(resourceFileName);
+ synthesizer = new DictionaryLookup(Dictionary.read(url));
+ }
+ final Pattern p = Pattern.compile(posTag);
+ final ArrayList<String> results = new ArrayList<String>();
+ for (final String tag : possibleTags) {
+ final Matcher m = p.matcher(tag);
+ if (m.matches()) {
+ final List<WordData> wordForms = synthesizer.lookup(token.getLemma() + "|" + tag);
+ for (WordData wd : wordForms)
+ results.add(wd.getStem().toString());
+ }
+ }
+ return results.toArray(new String[results.size()]);
+ }
+ return synthesize(token, posTag);
+ }
+
+ public String getPosTagCorrection(final String posTag) {
+ return posTag;
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/Synthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/Synthesizer.java
new file mode 100644
index 0000000..359bb20
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/Synthesizer.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+
+/**
+ * Part-of-speech synthesizer interface. Implementations are
+ * heavily language-dependent.
+ *
+ * @author Marcin Miłkowski
+ */
+
+public interface Synthesizer {
+
+ /** Generates a form of the word with a given POS tag for a given lemma.
+ * @param token the token to be used for synthesis
+ * @param posTag POS tag of the form to be generated.
+ **/
+ public String[] synthesize(AnalyzedToken token, String posTag) throws IOException;
+
+ /** Generates a form of the word with a given POS tag for a given lemma.
+ * POS tag can be specified using regular expressions.
+ * @param token the token to be used for synthesis
+ * @param posTag POS tag of the form to be generated.
+ * @param posTagRegExp Specifies whether the posTag string is a
+ * regular expression.
+ **/
+ public String[] synthesize(AnalyzedToken token, String posTag, boolean posTagRegExp) throws IOException;
+
+ /**
+ * Gets a corrected version of the POS tag used for synthesis.
+ * Useful when the tagset defines special disjunction that
+ * need to be converted into regexp disjunctions.
+ * @param posTag Original POS tag.
+ * to correct.
+ * @return @String Converted POS tag.
+ */
+ public String getPosTagCorrection(String posTag);
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/SynthesizerTools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/SynthesizerTools.java
new file mode 100644
index 0000000..f91614e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/SynthesizerTools.java
@@ -0,0 +1,64 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+public class SynthesizerTools {
+
+ private SynthesizerTools() {
+ // static methods only, no public constructor
+ }
+
+ public static ArrayList<String> loadWords(final InputStream file) throws IOException {
+ final ArrayList<String> set = new ArrayList<String>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file);
+ br = new BufferedReader(isr);
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ set.add(line);
+ }
+
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return set;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ca/CatalanSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ca/CatalanSynthesizer.java
new file mode 100644
index 0000000..c350f1b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ca/CatalanSynthesizer.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.ca;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Catalan word form synthesizer. <br/>
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class CatalanSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/ca/catalan_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/ca/catalan_tags.txt";
+
+ public CatalanSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizer.java
new file mode 100644
index 0000000..12935bf
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/en/EnglishSynthesizer.java
@@ -0,0 +1,99 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.en;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.rules.en.AvsAnRule;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * English word form synthesizer. <br/>
+ * Based on part-of-speech lists in Public Domain. See readme.txt for details,
+ * the POS tagset is described in tagset.txt.
+ *
+ * There are to special additions:
+ * <ol>
+ * <li>+DT - tag that adds "a" or "an" (according to the way the word is
+ * pronounced) and "the"</li>
+ * <li>+INDT - a tag that adds only "a" or "an"</li>
+ * </ol>
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class EnglishSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/en/english_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/en/english_tags.txt";
+
+ /** A special tag to add determiners. **/
+ private static final String ADD_DETERMINER = "+DT";
+
+ /** A special tag to add only indefinite articles. **/
+ private static final String ADD_IND_DETERMINER = "+INDT";
+
+ public EnglishSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+
+ /**
+ * Get a form of a given AnalyzedToken, where the form is defined by a
+ * part-of-speech tag.
+ *
+ * @param token
+ * AnalyzedToken to be inflected.
+ * @param posTag
+ * A desired part-of-speech tag.
+ * @return String value - inflected word.
+ */
+ public String[] synthesize(final AnalyzedToken token, final String posTag)
+ throws IOException {
+ if (ADD_DETERMINER.equals(posTag)) {
+ final AvsAnRule rule = new AvsAnRule(null);
+ return new String[] { rule.suggestAorAn(token.getToken()),
+ "the " + token.getToken() };
+ } else if (ADD_IND_DETERMINER.equals(posTag)) {
+ final AvsAnRule rule = new AvsAnRule(null);
+ return new String[] { rule.suggestAorAn(token.getToken()) };
+ } else {
+ if (synthesizer == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ synthesizer = new DictionaryLookup(Dictionary.read(url));
+ }
+ final List<WordData> wordData = synthesizer.lookup(token.getLemma() + "|" + posTag);
+ final List<String> wordForms = new ArrayList<String>();
+ for (WordData wd : wordData) {
+ wordForms.add(wd.getStem().toString());
+ }
+ return wordForms.toArray(new String[wordForms.size()]);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizer.java
new file mode 100644
index 0000000..48ffe93
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/es/SpanishSynthesizer.java
@@ -0,0 +1,44 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.es;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Spanish word form synthesizer. <br/>
+ *
+ * Based on Dutch word from synthesizer
+ *
+ * @author Juan Martorell
+ */
+
+public class SpanishSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/es/spanish_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/es/spanish_tags.txt";
+
+ public SpanishSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizer.java
new file mode 100644
index 0000000..8c85755
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/nl/DutchSynthesizer.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.nl;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Dutch word form synthesizer. <br/>
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class DutchSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/nl/dutch_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/nl/dutch_tags.txt";
+
+ public DutchSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizer.java
new file mode 100644
index 0000000..e86312c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/pl/PolishSynthesizer.java
@@ -0,0 +1,171 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.pl;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.Synthesizer;
+import de.danielnaber.languagetool.synthesis.SynthesizerTools;
+
+/**
+ * Polish word form synthesizer. Based on project Morfologik.
+ *
+ * @author Marcin Milkowski
+ */
+
+public class PolishSynthesizer implements Synthesizer {
+
+ private static final String RESOURCE_FILENAME = "/pl/polish_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/pl/polish_tags.txt";
+
+ private static final String POTENTIAL_NEGATION_TAG = ":aff";
+ private static final String NEGATION_TAG = ":neg";
+ private static final String COMP_TAG = "comp";
+ private static final String SUP_TAG = "sup";
+
+ private IStemmer synthesizer;
+
+ private ArrayList<String> possibleTags;
+
+ public final String[] synthesize(final AnalyzedToken token,
+ final String posTag) throws IOException {
+ if (posTag == null) {
+ return null;
+ }
+ if (synthesizer == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ synthesizer = new DictionaryLookup(Dictionary.read(url));
+ }
+ boolean isNegated = false;
+ if (token.getPOSTag() != null) {
+ isNegated = posTag.indexOf(NEGATION_TAG) > 0
+ || token.getPOSTag().indexOf(NEGATION_TAG) > 0
+ && !(posTag.indexOf(COMP_TAG) > 0) && !(posTag.indexOf(SUP_TAG) > 0);
+ }
+ if (posTag.indexOf('+') > 0) {
+ return synthesize(token, posTag, true);
+ }
+ final List<String> forms = getWordForms(token, posTag, isNegated);
+ return forms.toArray(new String[forms.size()]);
+ }
+
+ public final String[] synthesize(final AnalyzedToken token, final String pos,
+ final boolean posTagRegExp) throws IOException {
+ if (pos == null) {
+ return null;
+ }
+ String posTag = pos;
+ if (posTagRegExp) {
+ if (possibleTags == null) {
+ possibleTags = SynthesizerTools.loadWords(JLanguageTool.getDataBroker().
+ getFromResourceDirAsStream(TAGS_FILE_NAME));
+ }
+ if (synthesizer == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ synthesizer = new DictionaryLookup(Dictionary.read(url));
+ }
+ final ArrayList<String> results = new ArrayList<String>();
+
+ boolean isNegated = false;
+ if (token.getPOSTag() != null) {
+ isNegated = posTag.indexOf(NEGATION_TAG) > 0
+ || token.getPOSTag().indexOf(NEGATION_TAG) > 0
+ && !(posTag.indexOf(COMP_TAG) > 0)
+ && !(posTag.indexOf(SUP_TAG) > 0);
+ }
+
+ if (isNegated) {
+ posTag = posTag.replaceAll(NEGATION_TAG, POTENTIAL_NEGATION_TAG + "?");
+ }
+
+ final Pattern p = Pattern.compile(posTag.replace('+', '|').replaceAll(
+ "m[1-3]", "m[1-3]?"));
+
+ for (final String tag : possibleTags) {
+ final Matcher m = p.matcher(tag);
+ if (m.matches()) {
+ final List<String> wordForms = getWordForms(token, tag, isNegated);
+ if (wordForms != null) {
+ results.addAll(wordForms);
+ }
+ }
+ }
+ return results.toArray(new String[results.size()]);
+ }
+ return synthesize(token, posTag);
+ }
+
+ public final String getPosTagCorrection(final String posTag) {
+ if (posTag.contains(".")) {
+ final String[] tags = posTag.split(":");
+ int pos = -1;
+ for (int i = 0; i < tags.length; i++) {
+ if (tags[i].matches(".*[a-z]\\.[a-z].*")) {
+ tags[i] = "(.*" + tags[i].replace(".", ".*|.*") + ".*)";
+ pos = i;
+ }
+ }
+ if (pos == -1) {
+ return posTag;
+ }
+ final StringBuilder sb = new StringBuilder();
+ sb.append(tags[0]);
+ for (int i = 1; i < tags.length; i++) {
+ sb.append(':');
+ sb.append(tags[i]);
+ }
+ return sb.toString();
+ }
+ return posTag;
+ }
+
+ private List<String> getWordForms(final AnalyzedToken token, final String posTag,
+ final boolean isNegated) {
+ final List<String> forms = new ArrayList<String>();
+ final List<WordData> wordForms;
+ if (isNegated) {
+ wordForms = synthesizer.lookup(token.getLemma() + "|"
+ + posTag.replaceFirst(NEGATION_TAG, POTENTIAL_NEGATION_TAG));
+ if (wordForms != null) {
+ for (WordData wd : wordForms) {
+ forms.add("nie" + wd.getStem().toString());
+ }
+ }
+ } else {
+ wordForms = synthesizer.lookup(token.getLemma() + "|" + posTag);
+ for (WordData wd : wordForms) {
+ forms.add(wd.getStem().toString());
+ }
+ }
+ return forms;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizer.java
new file mode 100644
index 0000000..123bb62
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ro/RomanianSynthesizer.java
@@ -0,0 +1,40 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.ro;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Romanian word form synthesizer. <br/>
+ *
+ * @author Ionuț Păduraru
+ */
+
+public class RomanianSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/ro/romanian_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/ro/romanian_tags.txt";
+
+ public RomanianSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ru/RussianSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ru/RussianSynthesizer.java
new file mode 100644
index 0000000..7fd404b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/ru/RussianSynthesizer.java
@@ -0,0 +1,44 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.ru;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Russian word form synthesizer. <br/>
+ * @author Yakov Reztsov
+ *
+ * Based on Dutch word from synthesizer
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class RussianSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/ru/russian_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/ru/tags_russian.txt";
+
+ public RussianSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizer.java
new file mode 100644
index 0000000..0c2c018
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/synthesis/sk/SlovakSynthesizer.java
@@ -0,0 +1,40 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.synthesis.sk;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.synthesis.BaseSynthesizer;
+
+/**
+ * Slovak word form synthesizer. <br/>
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class SlovakSynthesizer extends BaseSynthesizer {
+
+ private static final String RESOURCE_FILENAME = "/sk/slovak_synth.dict";
+
+ private static final String TAGS_FILE_NAME = "/sk/slovak_tags.txt";
+
+ public SlovakSynthesizer() {
+ super(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME,
+ JLanguageTool.getDataBroker().getResourceDir() + TAGS_FILE_NAME);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/BaseTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/BaseTagger.java
new file mode 100644
index 0000000..d8399e1
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/BaseTagger.java
@@ -0,0 +1,152 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Base tagger using Lametyzator.
+ *
+ * @author Marcin Milkowski
+ */
+public abstract class BaseTagger implements Tagger {
+
+ private IStemmer morfologik;
+ private Locale conversionLocale = Locale.getDefault();
+
+ /**
+ * Get the filename, e.g., <tt>/resource/fr/french.dict</tt>.
+ **/
+ public abstract String getFileName();
+
+ public void setLocale(Locale loc) {
+ conversionLocale = loc;
+ }
+
+ public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens)
+ throws IOException {
+ List<AnalyzedToken> taggerTokens;
+ List<AnalyzedToken> lowerTaggerTokens;
+ List<AnalyzedToken> upperTaggerTokens;
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching IStemmer instance - lazy init
+ if (morfologik == null) {
+ final URL url = this.getClass().getResource(getFileName());
+ morfologik = new DictionaryLookup(Dictionary.read(url));
+ }
+
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ final String lowerWord = word.toLowerCase(conversionLocale);
+ taggerTokens = asAnalyzedTokenList(word, morfologik.lookup(word));
+ lowerTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(lowerWord));
+ final boolean isLowercase = word.equals(lowerWord);
+
+ //normal case
+ addTokens(taggerTokens, l);
+
+ if (!isLowercase) {
+ //lowercase
+ addTokens(lowerTaggerTokens, l);
+ }
+
+ //uppercase
+ if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
+ if (isLowercase) {
+ upperTaggerTokens = asAnalyzedTokenList(word,
+ morfologik.lookup(StringTools
+ .uppercaseFirstChar(word)));
+ if (!upperTaggerTokens.isEmpty()) {
+ addTokens(upperTaggerTokens, l);
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ }
+ tokenReadings.add(new AnalyzedTokenReadings(l, pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+
+ }
+
+ protected List<AnalyzedToken> asAnalyzedTokenList(final String word, final List<WordData> wdList) {
+ final List<AnalyzedToken> aTokenList = new ArrayList<AnalyzedToken>();
+ for (WordData wd : wdList) {
+ aTokenList.add(asAnalyzedToken(word, wd));
+ }
+ return aTokenList;
+ }
+ protected AnalyzedToken asAnalyzedToken(final String word, final WordData wd) {
+ return new AnalyzedToken(
+ word,
+ StringTools.asString(wd.getTag()),
+ StringTools.asString(wd.getStem()));
+ }
+
+ private void addTokens(final List<AnalyzedToken> taggedTokens,
+ final List<AnalyzedToken> l) {
+ if (taggedTokens != null) {
+ for (AnalyzedToken at : taggedTokens) {
+ /*
+ if (!StringTools.isEmpty(at.getPOSTag())) {
+ l.add(at);
+ } else {
+ l.add(new AnalyzedToken(at.getToken(), null, null));
+ }
+ */
+ l.add(at);
+ }
+ }
+ }
+
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * de.danielnaber.languagetool.tagging.Tagger#createNullToken(java.lang.String
+ * , int)
+ */
+ public final AnalyzedTokenReadings createNullToken(final String token,
+ final int startPos) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ManualTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ManualTagger.java
new file mode 100644
index 0000000..ae726fa
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ManualTagger.java
@@ -0,0 +1,127 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.danielnaber.languagetool.tools.StringTools;
+
+
+
+/**
+ * A tagger that reads the POS information from a plain (UTF-8) text file. This
+ * makes it possible for the user to edit the text file to let the system know
+ * about new words or missing readings in the *.dict file.
+ *
+ * <p>
+ * File Format: <tt>fullform baseform postags</tt> (tab separated)
+ *
+ * @author Daniel Naber
+ */
+public class ManualTagger {
+
+ private final Map<String, List<LookedUpTerm>> mapping;
+
+ public ManualTagger(final InputStream file) throws IOException {
+ mapping = loadMapping(file, "utf8");
+ }
+
+ /**
+ * Look up a word's baseform and POS information.
+ *
+ * @param term
+ * @return an array with the baseform (at position 0, 2, ...) and the POS
+ * information (at position 1, 3, ...) or <code>null</code> if the
+ * word is unknown
+ */
+ public String[] lookup(final String term) {
+ final List<LookedUpTerm> l = mapping.get(term);
+ if (l == null) {
+ return null;
+ }
+ final List<String> plainResult = new ArrayList<String>();
+ for (final Object element : l) {
+ final LookedUpTerm lookedUpTerm = (LookedUpTerm) element;
+ plainResult.add(lookedUpTerm.baseform);
+ plainResult.add(lookedUpTerm.postags);
+ }
+ if (plainResult.isEmpty()) {
+ return null;
+ }
+ return plainResult.toArray(new String[]{});
+ }
+
+ private Map<String, List<LookedUpTerm>> loadMapping(final InputStream file,
+ final String encoding) throws IOException {
+ final Map<String, List<LookedUpTerm>> map = new HashMap<String, List<LookedUpTerm>>();
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(file, encoding);
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ if (StringTools.isEmpty(line) || line.charAt(0)=='#') {
+ continue;
+ }
+ final String[] parts = line.split("\t");
+ if (parts.length != 3) {
+ throw new IOException("Unknown format in " + file + ": " + line);
+ }
+ if (map.containsKey(parts[0])) {
+ final List<LookedUpTerm> l = map.get(parts[0]);
+ l.add(new LookedUpTerm(parts[1], parts[2]));
+ map.put(parts[0], l);
+ } else {
+ final List<LookedUpTerm> l = new ArrayList<LookedUpTerm>();
+ l.add(new LookedUpTerm(parts[1], parts[2]));
+ map.put(parts[0], l);
+ }
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return map;
+ }
+
+}
+
+class LookedUpTerm {
+
+ String baseform;
+ String postags;
+
+ LookedUpTerm(final String baseform, final String postags) {
+ this.baseform = baseform;
+ this.postags = postags;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/Tagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/Tagger.java
new file mode 100644
index 0000000..64a1dbd
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/Tagger.java
@@ -0,0 +1,57 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * The part-of-speech tagger interface, whose implementations are usually language-dependent.
+ *
+ * @author Daniel Naber
+ */
+public interface Tagger {
+
+ /**
+ * Returns a list of {@link AnalyzedToken}s that assigns each term in the
+ * sentence some kind of part-of-speech information (not necessarily just one tag).
+ *
+ * <p>Note that this method takes exactly one sentence. Its implementation
+ * may implement special cases for the first word of a sentence, which is
+ * usually written with an uppercase letter.
+ *
+ * @param sentenceTokens the text as returned by a WordTokenizer but without whitespace tokens.
+ */
+ public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException;
+
+ /**
+ * Create the AnalyzedToken used for whitespace and other non-words. Use <code>null</code>
+ * as the POS tag for this token.
+ */
+ public AnalyzedTokenReadings createNullToken(String token, int startPos);
+
+ /**
+ * Create a token specific to the language of the implementing class.
+ */
+ public AnalyzedToken createToken(String token, String posTag);
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/be/BelarusianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/be/BelarusianTagger.java
new file mode 100644
index 0000000..8e10360
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/be/BelarusianTagger.java
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.be;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tagging.Tagger;
+
+/**
+ * Belarusian Tagger.
+ *
+ * Copyright (C) 2010 Alex Buloichik (alex73mail@gmail.com)
+ */
+public class BelarusianTagger implements Tagger {
+
+ public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) {
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ // a real tagger would need to assign a POS tag
+ // in the next line instead of null:
+ l.add(new AnalyzedToken(word, null, null));
+ pos += word.length();
+ tokenReadings.add(new AnalyzedTokenReadings(l
+ .toArray(new AnalyzedToken[0]), 0));
+ }
+ return tokenReadings;
+ }
+
+ public AnalyzedTokenReadings createNullToken(String token, int startPos) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null),
+ startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ca/CatalanTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ca/CatalanTagger.java
new file mode 100644
index 0000000..b0c266a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ca/CatalanTagger.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ca;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Catalan Tagger
+ *
+ * Based on FreeLing tagger dictionary
+ *
+ * @author Marcin Milkowski
+ */
+public class CatalanTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/ca/catalan.dict";
+ }
+
+ public CatalanTagger() {
+ super();
+ setLocale(new Locale("ca"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/cs/CzechTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/cs/CzechTagger.java
new file mode 100644
index 0000000..b33fd00
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/cs/CzechTagger.java
@@ -0,0 +1,115 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.cs;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Czech POS tagger based on FSA morphological dictionaries.
+ *
+ * @author Jozef Licko
+ */
+public class CzechTagger extends BaseTagger {
+
+ private static final String RESOURCE_FILENAME = "/cs/czech.dict";
+
+ private IStemmer morfologik;
+ private final Locale csLocale = new Locale("cs");
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME;
+ }
+
+ @Override
+ public final List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens)
+ throws IOException {
+ List<AnalyzedToken> taggerTokens;
+ List<AnalyzedToken> lowerTaggerTokens;
+ List<AnalyzedToken> upperTaggerTokens;
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching Lametyzator instance - lazy init
+ if (morfologik == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ morfologik = new DictionaryLookup(Dictionary.read(url));
+ }
+
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ final String lowerWord = word.toLowerCase(csLocale);
+ taggerTokens = asAnalyzedTokenList(word, morfologik.lookup(word));
+ lowerTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(lowerWord));
+ final boolean isLowercase = word.equals(lowerWord);
+
+ //normal case
+ addTokens(taggerTokens, l);
+
+ if (!isLowercase) {
+ //lowercase
+ addTokens(lowerTaggerTokens, l);
+ }
+
+ //uppercase
+ if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
+ if (isLowercase) {
+ upperTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(StringTools
+ .uppercaseFirstChar(word)));
+ if (!upperTaggerTokens.isEmpty()) {
+ addTokens(upperTaggerTokens, l);
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ }
+ tokenReadings.add(new AnalyzedTokenReadings(l, pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+ }
+
+ private void addTokens(final List<AnalyzedToken> taggedTokens,
+ final List<AnalyzedToken> l) {
+ if (taggedTokens != null) {
+ for (AnalyzedToken at : taggedTokens) {
+ final String[] tagsArr = StringTools.asString(at.getPOSTag()).split("\\+");
+ for (final String currTag : tagsArr) {
+ l.add(new AnalyzedToken(at.getToken(), currTag,
+ at.getLemma()));
+ }
+ }
+ }
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/da/DanishTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/da/DanishTagger.java
new file mode 100644
index 0000000..e383272
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/da/DanishTagger.java
@@ -0,0 +1,50 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.da;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/**
+ * Danish Part-of-speech tagger.
+ *
+ * Based on the spellcheck dictionary Stavekontrolden
+ * published by Foreningen for frit tilgængelige sprogværktøjer
+ * under the terms of the GNU LGPL version 2.1 and Mozilla MPL version 1.1.
+ *
+ * www.stavekontrolden.dk
+ *
+ * Stavekontrolden is based on data from Det Danske Sprog- og Litteraturselskab
+ * (The Danish Society for Language and Literature), http://www.dsl.dk.
+ *
+ * @author Esben Aaberg
+ */
+public class DanishTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/da/danish.dict";
+ }
+
+ public DanishTagger() {
+ super();
+ setLocale(new Locale("da"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanToken.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanToken.java
new file mode 100644
index 0000000..dcd5bc8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanToken.java
@@ -0,0 +1,136 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.de;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.tagging.de.GermanToken.Genus;
+import de.danielnaber.languagetool.tagging.de.GermanToken.Kasus;
+import de.danielnaber.languagetool.tagging.de.GermanToken.Numerus;
+import de.danielnaber.languagetool.tagging.de.GermanToken.POSType;
+
+/**
+ * One reading of a German word. Many words can have more
+ * than one reading, e.g. "Tische" can be both Nominativ Plural
+ * and Genitiv Plural (among other readings).
+ *
+ * @author Daniel Naber
+ */
+public class AnalyzedGermanToken extends AnalyzedToken {
+
+ private POSType type;
+ private Kasus casus;
+ private Numerus numerus;
+ private Genus genus;
+
+ public AnalyzedGermanToken(String token, String posTag) {
+ super(token, posTag, null);
+ initFromPOSTagString(posTag);
+ }
+
+ public AnalyzedGermanToken(String token, String posTag, String lemma) {
+ super(token, posTag, lemma);
+ initFromPOSTagString(posTag);
+ }
+
+ private void initFromPOSTagString(String posTagString) {
+ if (posTagString == null) {
+ return;
+ }
+ final String[] parts = posTagString.split(":");
+ if (parts.length < 3) {
+ //FIXME ??
+ //System.err.println(posTagString);
+ return;
+ }
+
+ //System.err.println(fullform + " " + posTagString);
+ for (String part : parts) {
+ if (part.equals("EIG"))
+ type = POSType.PROPER_NOUN;
+ else if (part.equals("SUB") && type == null)
+ type = POSType.NOMEN;
+ else if (part.equals("PA1") || part.equals("PA2"))
+ type = POSType.PARTIZIP;
+ else if (part.equals("VER") && type == null)
+ type = POSType.VERB;
+ else if (part.equals("ADJ") && type == null)
+ type = POSType.ADJEKTIV;
+ else if (part.equals("PRO") && type == null)
+ type = POSType.PRONOMEN;
+ else if (part.equals("ART") && type == null)
+ type = POSType.DETERMINER;
+
+ else if (part.equals("AKK"))
+ casus = Kasus.AKKUSATIV;
+ else if (part.equals("GEN"))
+ casus = Kasus.GENITIV;
+ else if (part.equals("NOM"))
+ casus = Kasus.NOMINATIV;
+ else if (part.equals("DAT"))
+ casus = Kasus.DATIV;
+
+ else if (part.equals("PLU"))
+ numerus = Numerus.PLURAL;
+ else if (part.equals("SIN"))
+ numerus = Numerus.SINGULAR;
+
+ else if (part.equals("MAS"))
+ genus = Genus.MASKULINUM;
+ else if (part.equals("FEM"))
+ genus = Genus.FEMININUM;
+ else if (part.equals("NEU"))
+ genus = Genus.NEUTRUM;
+ else if (part.equals("NOG"))
+ genus = Genus.FEMININUM; // NOG = no genus because only used as plural
+
+ else if (part.equals("DEF"))
+ ; // not yet used
+ else if (part.equals("DEM")) //???
+ ; // not yet used
+ else if (part.equals("PER"))
+ ; // not yet used
+
+ //else
+ //System.err.println("unknown: " + posTagString + " for fullform " + fullform);
+ // TODO: add else here that throws execption?!
+ }
+
+ }
+
+ public POSType getType() {
+ return type;
+ }
+
+ public Kasus getCasus() {
+ return casus;
+ }
+
+ public Numerus getNumerus() {
+ return numerus;
+ }
+
+ public Genus getGenus() {
+ return genus;
+ }
+
+ public String toString() {
+ return getPOSTag();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanTokenReadings.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanTokenReadings.java
new file mode 100644
index 0000000..81e5895
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/AnalyzedGermanTokenReadings.java
@@ -0,0 +1,172 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.de;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tagging.de.GermanToken.POSType;
+import de.danielnaber.languagetool.tools.StringTools;
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * All possible readings of an analyzed German word.
+ *
+ * @author Daniel Naber
+ */
+public class AnalyzedGermanTokenReadings extends AnalyzedTokenReadings {
+
+ public AnalyzedGermanTokenReadings(AnalyzedGermanToken[] aTokens, final int startPos) {
+ super(aTokens, startPos);
+ }
+
+ public AnalyzedGermanTokenReadings(AnalyzedGermanToken aToken, final int startPos) {
+ super(aToken, startPos);
+ }
+
+ /**
+ * @return a list of {@link AnalyzedGermanToken}s.
+ */
+ public List<AnalyzedGermanToken> getGermanReadings() {
+ final List<AnalyzedGermanToken> tokens = new ArrayList<AnalyzedGermanToken>();
+ for (AnalyzedToken reading : anTokReadings) {
+ if (reading.getPOSTag() != null) {
+ if (!reading.getPOSTag().equals(JLanguageTool.SENTENCE_END_TAGNAME) && !reading.getPOSTag().equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
+ tokens.add((AnalyzedGermanToken)reading);
+ }
+ } else {
+ tokens.add((AnalyzedGermanToken)reading);
+ }
+
+ }
+ return tokens;
+ }
+
+ public boolean hasReadingOfType(POSType type) {
+ if (anTokReadings == null)
+ return false;
+ for (AnalyzedToken reading : anTokReadings) {
+ if (reading.getPOSTag() != null) {
+ if (reading.getPOSTag().equals(JLanguageTool.SENTENCE_END_TAGNAME) || reading.getPOSTag().equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
+ return false;
+ }
+ }
+ final AnalyzedGermanToken germanReading = (AnalyzedGermanToken) reading;
+ if (germanReading.getType() == type)
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Return true if the analyzed word is a sentence or paragraph end.
+ */
+ public boolean isSentenceEnd() {
+ if (anTokReadings == null) {
+ return false;
+ }
+ for (AnalyzedToken reading : anTokReadings) {
+ if (reading.getPOSTag() != null) {
+ if (reading.getPOSTag().equals(JLanguageTool.SENTENCE_END_TAGNAME) || reading.getPOSTag().equals(JLanguageTool.PARAGRAPH_END_TAGNAME)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public boolean hasReading(GermanToken.Kasus kasus) {
+ if (anTokReadings == null)
+ return false;
+ for (AnalyzedToken reading : anTokReadings) {
+ final AnalyzedGermanToken germanReading = (AnalyzedGermanToken) reading;
+ if (germanReading.getCasus() == kasus)
+ return true;
+ }
+ return false;
+ }
+
+ public boolean hasReading(GermanToken.Numerus numerus) {
+ if (anTokReadings == null)
+ return false;
+ for (AnalyzedToken reading : anTokReadings) {
+ final AnalyzedGermanToken germanReading = (AnalyzedGermanToken) reading;
+ if (germanReading.getNumerus() == numerus)
+ return true;
+ }
+ return false;
+ }
+
+ public boolean hasReading(GermanToken.Genus genus) {
+ if (anTokReadings == null)
+ return false;
+ for (AnalyzedToken reading : anTokReadings) {
+ final AnalyzedGermanToken germanReading = (AnalyzedGermanToken) reading;
+ if (germanReading.getGenus() == genus)
+ return true;
+ }
+ return false;
+ }
+
+ public String toString() {
+ if (anTokReadings == null) {
+ return super.getAnalyzedToken(0).getToken() + "[?]";
+ }
+ final StringBuilder sb = new StringBuilder(super.getAnalyzedToken(0).getToken());
+ final Set<String> printed = new HashSet<String>();
+ sb.append('[');
+ for (AnalyzedToken reading : anTokReadings) {
+ if (!printed.contains(reading.toString())) {
+ if (printed.size() > 0)
+ sb.append(", ");
+ sb.append(reading.toString());
+ }
+ printed.add(reading.toString());
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+
+ /**
+ * Returns a string representation like {@code toString()}, but sorts
+ * the elements alphabetically.
+ */
+ public String toSortedString() {
+ if (anTokReadings == null) {
+ return super.getAnalyzedToken(0).getToken() + "[?]";
+ }
+ final StringBuilder sb = new StringBuilder(super.getAnalyzedToken(0).getToken());
+ final Set<String> elements = new TreeSet<String>();
+ sb.append('[');
+ for (AnalyzedToken reading : anTokReadings) {
+ if (!elements.contains(reading.toString())) {
+ elements.add(reading.toString());
+ }
+ }
+ sb.append(StringTools.listToString(elements, ", "));
+ sb.append(']');
+ return sb.toString();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanTagger.java
new file mode 100644
index 0000000..b8adf5b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanTagger.java
@@ -0,0 +1,201 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.de;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.ManualTagger;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tokenizers.de.GermanCompoundTokenizer;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * German tagger, requires data file in <code>resource/de/german.dict</code>.
+ *
+ * @author Marcin Milkowski, Daniel Naber
+ */
+public class GermanTagger implements Tagger {
+
+ private static final String DICT_FILENAME = "/de/german.dict";
+ private static final String USER_DICT_FILENAME = "/de/added.txt";
+
+ private static IStemmer morfologik;
+ private static ManualTagger manualTagger;
+ private static GermanCompoundTokenizer compoundTokenizer;
+
+ public GermanTagger() {
+ }
+
+ public AnalyzedGermanTokenReadings lookup(final String word) throws IOException {
+ final List<String> words = new ArrayList<String>();
+ words.add(word);
+ final List<AnalyzedTokenReadings> result = tag(words, false);
+ final AnalyzedGermanTokenReadings atr = (AnalyzedGermanTokenReadings) result.get(0);
+ if (atr.getAnalyzedToken(0).getPOSTag() == null)
+ return null;
+ return atr;
+ }
+
+ public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens) throws IOException {
+ return tag(sentenceTokens, true);
+ }
+
+ public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens, final boolean ignoreCase) throws IOException {
+ String[] taggerTokens;
+ boolean firstWord = true;
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching Lametyzator instance - lazy init
+ if (morfologik == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(DICT_FILENAME);
+ morfologik = new DictionaryLookup(Dictionary.read(url));
+ }
+ if (manualTagger == null) {
+ manualTagger = new ManualTagger(JLanguageTool.getDataBroker().getFromResourceDirAsStream(USER_DICT_FILENAME));
+ }
+ if (compoundTokenizer == null) {
+ compoundTokenizer = new GermanCompoundTokenizer();
+ }
+
+ for (String word: sentenceTokens) {
+ final List<AnalyzedGermanToken> l = new ArrayList<AnalyzedGermanToken>();
+ taggerTokens = lexiconLookup(word);
+ if (firstWord && taggerTokens == null && ignoreCase) { // e.g. "Das" -> "das" at start of sentence
+ taggerTokens = lexiconLookup(word.toLowerCase());
+ firstWord = false;
+ }
+ if (taggerTokens != null) {
+ tagWord(taggerTokens, word, l);
+ } else {
+ // word not known, try to decompose it and use the last part for POS tagging:
+ if (!StringTools.isEmpty(word.trim())) {
+ final List<String> compoundParts = compoundTokenizer.tokenize(word);
+ if (compoundParts.size() <= 1) {
+ l.add(new AnalyzedGermanToken(word, null, null));
+ } else {
+ // last part governs a word's POS:
+ String lastPart = compoundParts.get(compoundParts.size()-1);
+ if (StringTools.startsWithUppercase(word)) {
+ lastPart = StringTools.uppercaseFirstChar(lastPart);
+ }
+ taggerTokens = lexiconLookup(lastPart);
+ if (taggerTokens != null) {
+ tagWord(taggerTokens, word, l, compoundParts);
+ } else {
+ l.add(new AnalyzedGermanToken(word, null, null));
+ }
+ }
+ } else {
+ l.add(new AnalyzedGermanToken(word, null, null));
+ }
+ }
+
+ //tokenReadings.add(new AnalyzedGermanToken(new AnalyzedTokenReadings((AnalyzedToken[]) l.toArray(new AnalyzedToken[0]))));
+ tokenReadings.add(new AnalyzedGermanTokenReadings(l.toArray(new AnalyzedGermanToken[l.size()]), pos));
+ pos += word.length();
+ }
+ return tokenReadings;
+ }
+
+ private void tagWord(String[] taggerTokens, String word, List<AnalyzedGermanToken> l) {
+ tagWord(taggerTokens, word, l, null);
+ }
+
+ /**
+ * @param compoundParts all compound parts of the complete word or <code>null</code>,
+ * if the original input is not a compound
+ */
+ private void tagWord(String[] taggerTokens, String word, List<AnalyzedGermanToken> l,
+ List<String> compoundParts) {
+ int i = 0;
+ while (i < taggerTokens.length) {
+ // Lametyzator returns data as String[]
+ // first lemma, then annotations
+ if (compoundParts != null) {
+ // was originally a compound word
+ final List<String> allButLastPart = compoundParts.subList(0, compoundParts.size() - 1);
+ final String lemma = StringTools.listToString(allButLastPart, "")
+ + StringTools.lowercaseFirstChar(taggerTokens[i]);
+ l.add(new AnalyzedGermanToken(word, taggerTokens[i + 1], lemma));
+ } else {
+ l.add(new AnalyzedGermanToken(word, taggerTokens[i + 1], taggerTokens[i]));
+ }
+ i = i + 2;
+ }
+ }
+
+ private String[] lexiconLookup(final String word) {
+ final String[] posTagsFromUserDict = manualTagger.lookup(word);
+ final List<WordData> posTagsFromDict = morfologik.lookup(word);
+ if (posTagsFromUserDict != null && !posTagsFromDict.isEmpty()) {
+ final String[] allPosTags = new String[posTagsFromUserDict.length + posTagsFromDict.size() * 2];
+ //System.arraycopy(posTagsFromDict, 0, allPosTags, 0, posTagsFromDict.size());
+ int i = 0;
+ for (WordData wd : posTagsFromDict) {
+ allPosTags[i] = wd.getStem().toString();
+ allPosTags[i + 1] = wd.getTag().toString();
+ i = i + 2;
+ }
+ System.arraycopy(posTagsFromUserDict, 0, allPosTags, posTagsFromDict.size() * 2, posTagsFromUserDict.length);
+ return allPosTags;
+ } else if (posTagsFromUserDict == null && !posTagsFromDict.isEmpty()) {
+ final String[] allPosTags = new String[posTagsFromDict.size() * 2];
+ int i = 0;
+ for (WordData wd : posTagsFromDict) {
+ allPosTags[i] = wd.getStem().toString();
+ allPosTags[i + 1] = wd.getTag().toString();
+ i = i + 2;
+ }
+ return allPosTags;
+ } else {
+ return posTagsFromUserDict;
+ }
+ }
+
+ public final AnalyzedGermanTokenReadings createNullToken(final String token, final int startPos) {
+ return new AnalyzedGermanTokenReadings(new AnalyzedGermanToken(token, null, null), startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedGermanToken(token, posTag);
+ }
+
+ /**
+ * Test only
+ */
+ public static void main(final String[] args) throws IOException {
+ final GermanTagger gt = new GermanTagger();
+ final List<String> l = new ArrayList<String>();
+ l.add("Einfacher");
+ //System.err.println(gt.lookup("Treffen", 0));
+ final List<AnalyzedTokenReadings> res = gt.tag(l);
+ System.err.println(res);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanToken.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanToken.java
new file mode 100644
index 0000000..d6011a0
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/de/GermanToken.java
@@ -0,0 +1,104 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.de;
+
+/**
+ * Constants used to describe the properties of German tokens.
+ *
+ * @author Daniel Naber
+ */
+public final class GermanToken {
+
+ private GermanToken() {
+ // only static stuff
+ }
+
+ public static final class POSType {
+ public static final POSType NOMEN = new POSType("Nomen");
+ public static final POSType VERB = new POSType("Verb");
+ public static final POSType ADJEKTIV = new POSType("Adjektiv");
+ public static final POSType DETERMINER = new POSType("Determiner");
+ public static final POSType PRONOMEN = new POSType("Pronomen");
+ public static final POSType PARTIZIP = new POSType("Partizip");
+ public static final POSType PROPER_NOUN = new POSType("Eigenname");
+ public static final POSType OTHER = new POSType("Other"); // e.g. sentence start
+
+ private final String name;
+
+ private POSType(final String name) {
+ this.name = name;
+ }
+
+ public String toString() {
+ return name;
+ }
+ }
+
+ public static final class Kasus {
+ public static final Kasus NOMINATIV = new Kasus("Nominativ");
+ public static final Kasus AKKUSATIV = new Kasus("Akkusativ");
+ public static final Kasus DATIV = new Kasus("Dativ");
+ public static final Kasus GENITIV = new Kasus("Genitiv");
+ public static final Kasus OTHER = new Kasus("Other");
+
+ private final String name;
+
+ private Kasus(final String name) {
+ this.name = name;
+ }
+
+ public String toString() {
+ return name;
+ }
+ }
+
+ public static final class Numerus {
+ public static final Numerus SINGULAR = new Numerus("Singular");
+ public static final Numerus PLURAL = new Numerus("Plural");
+ public static final Numerus OTHER = new Numerus("Other");
+
+ private final String name;
+
+ private Numerus(final String name) {
+ this.name = name;
+ }
+
+ public String toString() {
+ return name;
+ }
+ }
+
+ public static final class Genus {
+ public static final Genus NEUTRUM = new Genus("Neutrum");
+ public static final Genus MASKULINUM = new Genus("Maskulinum");
+ public static final Genus FEMININUM = new Genus("Femininum");
+ public static final Genus OTHER = new Genus("Other");
+
+ private final String name;
+
+ private Genus(final String name) {
+ this.name = name;
+ }
+
+ public String toString() {
+ return name;
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/Disambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/Disambiguator.java
new file mode 100644
index 0000000..88c5455
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/Disambiguator.java
@@ -0,0 +1,50 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.disambiguation;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+
+/**
+ * Disambiguator interface. Particular implementations are language-dependent.
+ *
+ * <p>
+ * The POS tagger might assign multiple tags to the token.
+ *
+ * The goal is to filter out the incorrect tags and leave ideally only one per
+ * token.
+ *
+ * @author Jozef Licko
+ */
+public interface Disambiguator {
+
+ /**
+ * If possible, filters out the wrong POS tags.
+ *
+ * @param input
+ * The sentence with already tagged words. The words are expected to
+ * have multiple tags.
+ * @return Analyzed sentence, where each word has only one (possibly the most
+ * correct) tag.
+ * @throws IOException
+ */
+ AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException;
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunker.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunker.java
new file mode 100644
index 0000000..a190cfb
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishChunker.java
@@ -0,0 +1,199 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.pl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+
+/**
+ * Multiword tagger-chunker for Polish.
+ *
+ * @author Marcin Miłkowski
+ */
+public class PolishChunker implements Disambiguator {
+
+ private Map<String, String> mStartSpace;
+ private Map<String, String> mStartNoSpace;
+ private Map<String, String> mFull;
+
+ private static final String FILENAME = "/pl/multiwords.txt";
+
+ /*
+ * Lazy init, thanks to Artur Trzewik
+ */
+ private void lazyInit() throws IOException {
+
+ if (mStartSpace != null)
+ return;
+
+ mStartSpace = new HashMap<String, String>();
+ mStartNoSpace = new HashMap<String, String>();
+ mFull = new HashMap<String, String>();
+
+ final List<String> posTokens = loadWords(JLanguageTool.getDataBroker().getFromResourceDirAsStream(FILENAME));
+ for (String posToken : posTokens) {
+ final String[] tokenAndTag = posToken.split("\t");
+ final boolean containsSpace = tokenAndTag[0].indexOf(' ') > 0;
+ String firstToken = "";
+ final String[] firstTokens;
+ if (!containsSpace) {
+ firstTokens = new String[tokenAndTag[0].length()];
+ firstToken = tokenAndTag[0].substring(0, 1);
+ for (int i = 1; i < tokenAndTag[0].length(); i++) {
+ firstTokens[i] = tokenAndTag[0].substring(0 + (i - 1), i);
+ }
+ if (mStartNoSpace.containsKey(firstToken)) {
+ if (Integer.parseInt(mStartNoSpace.get(firstToken)) < firstTokens.length) {
+ mStartNoSpace.put(firstToken, Integer.toString(firstTokens.length));
+ }
+ } else {
+ mStartNoSpace.put(firstToken, Integer.toString(firstTokens.length));
+ }
+ } else {
+ firstTokens = tokenAndTag[0].split(" ");
+ firstToken = firstTokens[0];
+
+ if (mStartSpace.containsKey(firstToken)) {
+ if (Integer.parseInt(mStartSpace.get(firstToken)) < firstTokens.length) {
+ mStartSpace.put(firstToken, Integer.toString(firstTokens.length));
+ }
+ } else {
+ mStartSpace.put(firstToken, Integer.toString(firstTokens.length));
+ }
+ }
+ mFull.put(tokenAndTag[0], tokenAndTag[1]);
+ }
+ }
+
+ /**
+ * Implements multiword POS tags, e.g., &lt;ELLIPSIS&gt; for ellipsis (...)
+ * start, and &lt;/ELLIPSIS&gt; for ellipsis end.
+ *
+ * @param input
+ * The tokens to be chunked.
+ * @return AnalyzedSentence with additional markers.
+ * @throws IOException
+ */
+ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) throws IOException {
+
+ lazyInit();
+
+ final AnalyzedTokenReadings[] anTokens = input.getTokens();
+ final AnalyzedTokenReadings[] output = anTokens;
+
+ for (int i = 0; i < anTokens.length; i++) {
+ final String tok = output[i].getToken();
+ final StringBuilder tokens = new StringBuilder();
+
+ int finalLen = 0;
+ if (mStartSpace.containsKey(tok)) {
+ final int len = Integer.parseInt(mStartSpace.get(tok));
+ int j = i;
+ int lenCounter = 0;
+ while (j < anTokens.length) {
+ if (!anTokens[j].isWhitespace()) {
+ tokens.append(anTokens[j].getToken());
+ if (mFull.containsKey(tokens.toString())) {
+ final AnalyzedToken tokenStart = new AnalyzedToken(tok, "<"
+ + mFull.get(tokens.toString()) + ">", tokens.toString());
+ output[i].addReading(tokenStart);
+ final AnalyzedToken tokenEnd = new AnalyzedToken(
+ anTokens[finalLen].getToken(), "</"
+ + mFull.get(tokens.toString()) + ">", tokens.toString());
+ output[finalLen].addReading(tokenEnd);
+ }
+ lenCounter++;
+ if (lenCounter == len) {
+ break;
+ }
+ tokens.append(' ');
+ }
+ j++;
+ finalLen = j;
+ }
+ }
+
+ if (mStartNoSpace.containsKey(tok)) {
+ final int len = Integer.parseInt(mStartNoSpace.get(tok));
+ if (i + len <= anTokens.length) {
+ for (int j = i; j < i + len; j++) {
+ tokens.append(anTokens[j].getToken());
+ if (mFull.containsKey(tokens.toString())) {
+ final AnalyzedToken tokenStart = new AnalyzedToken(tok, "<"
+ + mFull.get(tokens.toString()) + ">", tokens.toString());
+ output[i].addReading(tokenStart);
+ final AnalyzedToken tokenEnd = new AnalyzedToken(anTokens
+ [i + len - 1].getToken(),
+ "</" + mFull.get(tokens.toString()) + ">",
+ tokens.toString());
+ output[i + len - 1].addReading(tokenEnd);
+ }
+ }
+ }
+ }
+ }
+
+ return new AnalyzedSentence(output);
+ }
+
+ private List<String> loadWords(final InputStream file) throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ final List<String> lines = new ArrayList<String>();
+ try {
+ isr = new InputStreamReader(file, "UTF-8");
+ br = new BufferedReader(isr);
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ lines.add(line);
+ }
+
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return lines;
+ }
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishHybridDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishHybridDisambiguator.java
new file mode 100644
index 0000000..b06cbdb
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/pl/PolishHybridDisambiguator.java
@@ -0,0 +1,48 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.pl;
+
+import java.io.IOException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.pl.PolishRuleDisambiguator;
+
+/**
+ * Hybrid chunker-disambiguator for Polish.
+ *
+ * @author Marcin Miłkowski
+ */
+
+public class PolishHybridDisambiguator implements Disambiguator {
+
+ private final Disambiguator chunker = new PolishChunker();
+ private final Disambiguator disambiguator = new PolishRuleDisambiguator();
+
+ /**
+ * Calls two disambiguator classes: (1) a chunker; (2) a rule-based
+ * disambiguator.
+ */
+ public final AnalyzedSentence disambiguate(AnalyzedSentence input)
+ throws IOException {
+ return disambiguator.disambiguate(chunker.disambiguate(input));
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/AbstractRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/AbstractRuleDisambiguator.java
new file mode 100644
index 0000000..4605fb7
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/AbstractRuleDisambiguator.java
@@ -0,0 +1,83 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+import de.danielnaber.languagetool.tools.Tools;
+
+/**
+ * Rule-based disambiguator.
+ * Implements an idea by Agnes Souque.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public abstract class AbstractRuleDisambiguator implements Disambiguator {
+
+ protected static final String DISAMB_FILE = "disambiguation.xml";
+ protected List<DisambiguationPatternRule> disambiguationRules;
+
+ protected abstract Language getLanguage();
+
+ @Override
+ public AnalyzedSentence disambiguate(final AnalyzedSentence input) throws IOException {
+ AnalyzedSentence sentence = input;
+ if (disambiguationRules == null) {
+ final String defaultPatternFilename =
+ JLanguageTool.getDataBroker().getResourceDir() + "/" + getLanguage().getShortName() + "/" + DISAMB_FILE;
+ try {
+ disambiguationRules = loadPatternRules(defaultPatternFilename);
+ } catch (final Exception e) {
+ throw new RuntimeException("Problems with parsing disambiguation file: "
+ + defaultPatternFilename, e);
+ }
+ }
+ for (final DisambiguationPatternRule patternRule : disambiguationRules) {
+ sentence = patternRule.replace(sentence);
+ }
+ return sentence;
+ }
+
+ /**
+ * Load disambiguation rules from an XML file. Use {@link de.danielnaber.languagetool.JLanguageTool#addRule} to add
+ * these rules to the checking process.
+ *
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ * @return a List of {@link DisambiguationPatternRule} objects
+ */
+ protected List<DisambiguationPatternRule> loadPatternRules(final String filename) throws ParserConfigurationException, SAXException, IOException {
+ final DisambiguationRuleLoader ruleLoader = new DisambiguationRuleLoader();
+ return ruleLoader.getRules(Tools.getStream(filename));
+ }
+
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambXMLRuleHandler.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambXMLRuleHandler.java
new file mode 100644
index 0000000..5154009
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambXMLRuleHandler.java
@@ -0,0 +1,52 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import de.danielnaber.languagetool.rules.patterns.XMLRuleHandler;
+
+/**
+ * XML rule handler that loads disambiguation rules from XML and throws
+ * exceptions on errors and warnings.
+ *
+ * @author Daniel Naber
+ */
+class DisambXMLRuleHandler extends XMLRuleHandler {
+
+ final List<DisambiguationPatternRule> rules = new ArrayList<DisambiguationPatternRule>();
+
+ boolean inDisambiguation;
+
+ List<DisambiguationPatternRule> getDisambRules() {
+ return rules;
+ }
+
+ public void warning (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+ public void error (final SAXParseException e) throws SAXException {
+ throw e;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguatedExample.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguatedExample.java
new file mode 100644
index 0000000..127e0a4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguatedExample.java
@@ -0,0 +1,77 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+/**
+ * Disambiguated example. Used for testing
+ * disambiguator rules.
+ * @author Marcin Milkowski
+ * @since 0.9.8
+ */
+public class DisambiguatedExample {
+
+ private String example;
+ private String inputForms;
+ private String outputForms;
+
+
+ public DisambiguatedExample(final String example) {
+ this.example = example;
+ }
+
+ /**
+ * @param example
+ * Example sentence
+ * @param input
+ * Ambiguous forms of a token
+ * (specify in 'word[lemma/POS]' format)
+ * @param output
+ * Disambiguated forms of a token
+ * (specify in 'word[lemma/POS]' format)
+ */
+ public DisambiguatedExample(final String example, final String input, final String output) {
+ this(example);
+ inputForms = input;
+ outputForms = output;
+ }
+
+ /**
+ * Return the example that contains the error.
+ */
+ public String getExample() {
+ return example;
+ }
+
+ /**
+ * Return the possible corrections. May be null.
+ */
+ public String getDisambiguated() {
+ return outputForms;
+ }
+
+ public String getAmbiguous() {
+ return inputForms;
+ }
+
+ public String toString() {
+ return example + ": " + inputForms + " -> " + outputForms;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
new file mode 100644
index 0000000..6b5d3a8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
@@ -0,0 +1,357 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.AbstractPatternRule;
+import de.danielnaber.languagetool.rules.patterns.Element;
+import de.danielnaber.languagetool.rules.patterns.Match;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * A Rule that describes a pattern of words or part-of-speech tags used for
+ * disambiguation.
+ *
+ * @author Marcin Miłkowski
+ */
+public class DisambiguationPatternRule extends AbstractPatternRule {
+
+ /** Possible disambiguator actions. **/
+ public enum DisambiguatorAction {
+ ADD, FILTER, REMOVE, REPLACE, UNIFY;
+
+ /**
+ * Converts string to the constant enum.
+ *
+ * @param str
+ * String value to be converted.
+ * @return DisambiguatorAction enum.
+ */
+ public static DisambiguatorAction toAction(final String str) {
+ try {
+ return valueOf(str);
+ } catch (final Exception ex) {
+ return REPLACE;
+ }
+ }
+ }
+
+ private final String disambiguatedPOS;
+
+ private final Match matchElement;
+
+ private final DisambiguatorAction disAction;
+
+ private AnalyzedToken[] newTokenReadings;
+
+ private List<DisambiguatedExample> examples;
+
+ private List<String> untouchedExamples;
+
+ /**
+ * @param id
+ * Id of the Rule
+ * @param language
+ * Language of the Rule
+ * @param elements
+ * Element (token) list
+ * @param description
+ * Description to be shown (name)
+ * @param disambAction
+ * - the action to be executed on found token(s), one of the
+ * following: add, filter, remove, replace, unify.
+ *
+ */
+
+ DisambiguationPatternRule(final String id, final String description,
+ final Language language, final List<Element> elements,
+ final String disamb, final Match posSelect,
+ final DisambiguatorAction disambAction) {
+ super(id, description, language, elements, true);
+ if (id == null) {
+ throw new NullPointerException("id cannot be null");
+ }
+ if (language == null) {
+ throw new NullPointerException("language cannot be null");
+ }
+ if (elements == null) {
+ throw new NullPointerException("elements cannot be null");
+ }
+ if (description == null) {
+ throw new NullPointerException("description cannot be null");
+ }
+ if (disamb == null && posSelect == null
+ && disambAction != DisambiguatorAction.UNIFY
+ && disambAction != DisambiguatorAction.ADD
+ && disambAction != DisambiguatorAction.REMOVE) {
+ throw new NullPointerException("disambiguated POS cannot be null");
+ }
+ this.disambiguatedPOS = disamb;
+ this.matchElement = posSelect;
+ this.disAction = disambAction;
+ this.unifier = language.getDisambiguationUnifier();
+ }
+
+ /**
+ * Used to add new interpretations.
+ *
+ * @param newReadings
+ * An array of AnalyzedTokens. The length of the array should be the
+ * same as the number of the tokens matched and selected by
+ * mark/mark_from & mark_to attributes (>1).
+ */
+ public final void setNewInterpretations(final AnalyzedToken[] newReadings) {
+ newTokenReadings = newReadings.clone();
+ }
+
+ /**
+ * Performs disambiguation on the source sentence.
+ *
+ * @param text
+ * {@link AnalyzedSentence} Sentence to be disambiguated.
+ * @return {@link AnalyzedSentence} Disambiguated sentence (might be
+ * unchanged).
+ * @throws IOException
+ */
+ public final AnalyzedSentence replace(final AnalyzedSentence text)
+ throws IOException {
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ AnalyzedTokenReadings[] whTokens = text.getTokens();
+ final int[] tokenPositions = new int[tokens.length + 1];
+ final int patternSize = patternElements.size();
+ final int limit = Math.max(0, tokens.length - patternSize + 1);
+ Element elem = null;
+ boolean changed = false;
+ for (int i = 0; i < limit && !(sentStart && i > 0); i++) {
+ boolean allElementsMatch = false;
+ unifiedTokens = null;
+ int matchingTokens = 0;
+ int skipShiftTotal = 0;
+ int firstMatchToken = -1;
+ int prevSkipNext = 0;
+ if (testUnification) {
+ unifier.reset();
+ }
+ for (int k = 0; k < patternSize; k++) {
+ final Element prevElement = elem;
+ elem = patternElements.get(k);
+ setupRef(firstMatchToken, elem, tokens);
+ final int nextPos = i + k + skipShiftTotal;
+ prevMatched = false;
+ if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // SENT_END?
+ prevSkipNext = tokens.length - (nextPos + 1);
+ }
+ final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - (patternSize - k));
+ for (int m = nextPos; m <= maxTok; m++) {
+ allElementsMatch = testAllReadings(tokens, elem, prevElement, m,
+ firstMatchToken, prevSkipNext);
+ if (allElementsMatch) {
+ final int skipShift = m - nextPos;
+ tokenPositions[matchingTokens] = skipShift + 1;
+ prevSkipNext = elem.getSkipNext();
+ matchingTokens++;
+ skipShiftTotal += skipShift;
+ if (firstMatchToken == -1) {
+ firstMatchToken = m;
+ }
+ break;
+ }
+ }
+ if (!allElementsMatch) {
+ break;
+ }
+ }
+ if (allElementsMatch && matchingTokens == patternSize) {
+ whTokens = executeAction(text, whTokens, unifiedTokens,
+ firstMatchToken, matchingTokens, tokenPositions);
+ changed = true;
+ }
+ }
+ if (changed) {
+ return new AnalyzedSentence(whTokens, text.getWhPositions());
+ }
+ return text;
+ }
+
+ private AnalyzedTokenReadings[] executeAction(final AnalyzedSentence text,
+ final AnalyzedTokenReadings[] whiteTokens,
+ final AnalyzedTokenReadings[] unifiedTokens, final int firstMatchToken,
+ final int matchingTokens, final int[] tokenPositions) {
+ final AnalyzedTokenReadings[] whTokens = whiteTokens.clone();
+ int correctedStPos = 0;
+ if (startPositionCorrection > 0) {
+ for (int l = 0; l <= startPositionCorrection; l++) {
+ correctedStPos += tokenPositions[l];
+ }
+ correctedStPos--;
+ }
+ int correctedEndPos = 0;
+ if (endPositionCorrection < 0) {
+ int l = 0;
+ while (l > endPositionCorrection) {
+ correctedEndPos -= tokenPositions[matchingTokens + l - 1];
+ l--;
+ }
+ }
+ final int fromPos = text.getOriginalPosition(firstMatchToken
+ + correctedStPos);
+ final int numRead = whTokens[fromPos].getReadingsLength();
+ final boolean spaceBefore = whTokens[fromPos].isWhitespaceBefore();
+ boolean filtered = false;
+ switch (disAction) {
+ case UNIFY:
+ if (unifiedTokens != null) {
+ if (unifiedTokens.length == matchingTokens - startPositionCorrection
+ + endPositionCorrection) {
+ if (whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + unifiedTokens.length - 1)].isSentEnd()) {
+ unifiedTokens[unifiedTokens.length - 1].setSentEnd();
+ }
+ for (int i = 0; i < unifiedTokens.length; i++) {
+ unifiedTokens[i].setStartPos(whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i)].getStartPos());
+ whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i)] = unifiedTokens[i];
+ }
+ }
+ }
+ break;
+ case REMOVE:
+ if (newTokenReadings != null) {
+ if (newTokenReadings.length == matchingTokens - startPositionCorrection
+ + endPositionCorrection) {
+ for (int i = 0; i < newTokenReadings.length; i++) {
+ whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i)].removeReading(newTokenReadings[i]);
+ }
+ }
+ }
+ break;
+ case ADD:
+ if (newTokenReadings != null) {
+ if (newTokenReadings.length == matchingTokens - startPositionCorrection
+ + endPositionCorrection) {
+ String lemma = "";
+ String token = "";
+ for (int i = 0; i < newTokenReadings.length; i++) {
+ if ("".equals(newTokenReadings[i].getToken())) { //empty token
+ token = whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i)].getToken();
+ } else {
+ token = newTokenReadings[i].getToken();
+ }
+ if (newTokenReadings[i].getLemma() == null) { //empty lemma
+ lemma = token;
+ } else {
+ lemma = newTokenReadings[i].getLemma();
+ }
+ final AnalyzedToken newTok = new AnalyzedToken(token, newTokenReadings[i].getPOSTag(), lemma);
+ whTokens[text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i)].addReading(newTok);
+ }
+ }
+ }
+ break;
+ case FILTER:
+ if (matchElement == null) { // same as REPLACE if using <match>
+ final Match tmpMatchToken = new Match(disambiguatedPOS, null, true,
+ disambiguatedPOS, null, Match.CaseConversion.NONE,
+ false, Match.IncludeRange.NONE);
+ tmpMatchToken.setToken(whTokens[fromPos]);
+ whTokens[fromPos] = tmpMatchToken.filterReadings();
+ filtered = true;
+ }
+ case REPLACE:
+ default:
+ if (!filtered) {
+ if (matchElement == null) {
+ String lemma = "";
+ for (int l = 0; l < numRead; l++) {
+ if (whTokens[fromPos].getAnalyzedToken(l).getPOSTag() != null
+ && (whTokens[fromPos].getAnalyzedToken(l).getPOSTag().equals(
+ disambiguatedPOS) && (whTokens[fromPos].getAnalyzedToken(l)
+ .getLemma() != null))) {
+ lemma = whTokens[fromPos].getAnalyzedToken(l).getLemma();
+ }
+ }
+ if (StringTools.isEmpty(lemma)) {
+ lemma = whTokens[fromPos].getAnalyzedToken(0).getLemma();
+ }
+
+ final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
+ new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS,
+ lemma), whTokens[fromPos].getStartPos());
+ final boolean isSentEnd = whTokens[fromPos].isSentEnd();
+ final boolean isParaEnd = whTokens[fromPos].isParaEnd();
+ whTokens[fromPos] = toReplace;
+ if (isSentEnd) {
+ whTokens[fromPos].setSentEnd();
+ }
+ if (isParaEnd) {
+ whTokens[fromPos].setParaEnd();
+ }
+ whTokens[fromPos].setWhitespaceBefore(spaceBefore);
+ } else {
+ // using the match element
+ matchElement.setToken(whTokens[fromPos]);
+ whTokens[fromPos] = matchElement.filterReadings();
+ whTokens[fromPos].setWhitespaceBefore(spaceBefore);
+ }
+ }
+ }
+ return whTokens;
+ }
+
+ /**
+ * @param examples
+ * the examples to set
+ */
+ public void setExamples(final List<DisambiguatedExample> examples) {
+ this.examples = examples;
+ }
+
+ /**
+ * @return the examples
+ */
+ public List<DisambiguatedExample> getExamples() {
+ return examples;
+ }
+
+ /**
+ * @param untouchedExamples
+ * the untouchedExamples to set
+ */
+ public void setUntouchedExamples(final List<String> untouchedExamples) {
+ this.untouchedExamples = untouchedExamples;
+ }
+
+ /**
+ * @return the untouchedExamples
+ */
+ public List<String> getUntouchedExamples() {
+ return untouchedExamples;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
new file mode 100644
index 0000000..5e38956
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
@@ -0,0 +1,453 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.disambiguation.rules;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.patterns.Element;
+import de.danielnaber.languagetool.rules.patterns.Match;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule.DisambiguatorAction;
+
+/**
+ * Loads {@link DisambiguationPatternRule}s from a disambiguation rules XML
+ * file.
+ *
+ * @author Marcin Miłkowski
+ */
+public class DisambiguationRuleLoader extends DefaultHandler {
+
+ public DisambiguationRuleLoader() {
+ super();
+ }
+
+ public final List<DisambiguationPatternRule> getRules(final InputStream file)
+ throws ParserConfigurationException, SAXException, IOException {
+ final DisambiguationRuleHandler handler = new DisambiguationRuleHandler();
+ final SAXParserFactory factory = SAXParserFactory.newInstance();
+ final SAXParser saxParser = factory.newSAXParser();
+ saxParser.parse(file, handler);
+ return handler.getDisambRules();
+ }
+
+}
+
+class DisambiguationRuleHandler extends DisambXMLRuleHandler {
+
+ private static final String MARK = "mark";
+ private static final String WD = "wd";
+ private static final String ACTION = "action";
+ private static final String DISAMBIG = "disambig";
+
+ private String name;
+ private String ruleGroupId;
+ private String ruleGroupName;
+ private StringBuilder disamb = new StringBuilder();
+ private StringBuilder wd = new StringBuilder();
+ private StringBuilder example = new StringBuilder();
+
+ private boolean inWord;
+
+ private String disambiguatedPOS;
+
+ private int positionCorrection;
+ private int endPositionCorrection;
+ private boolean singleTokenCorrection;
+
+ private Match posSelector;
+
+ private int uniCounter;
+
+ private List<AnalyzedToken> newWdList;
+ private String wdLemma;
+ private String wdPos;
+
+ private boolean inExample;
+ private boolean untouched;
+ private List<String> untouchedExamples;
+ private List<DisambiguatedExample> disambExamples;
+ private String input;
+ private String output;
+
+ private DisambiguationPatternRule.DisambiguatorAction disambigAction;
+
+
+ // ===========================================================
+ // SAX DocumentHandler methods
+ // ===========================================================
+
+ @Override
+ public void startElement(final String namespaceURI, final String lName,
+ final String qName, final Attributes attrs) throws SAXException {
+ if (qName.equals("rule")) {
+ id = attrs.getValue("id");
+ name = attrs.getValue("name");
+ if (inRuleGroup && id == null) {
+ id = ruleGroupId;
+ }
+ if (inRuleGroup && name == null) {
+ name = ruleGroupName;
+ }
+ } else if ("rules".equals(qName)) {
+ language = Language.getLanguageForShortName(attrs.getValue("lang"));
+ } else if (qName.equals(PATTERN)) {
+ inPattern = true;
+ if (attrs.getValue(MARK) != null && (attrs.getValue(MARK_FROM) != null)) {
+ throw new SAXException(
+ "You cannot use both mark and mark_from attributes." + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ if (attrs.getValue(MARK) != null && (attrs.getValue(MARK_TO) != null)) {
+ throw new SAXException(
+ "You cannot use both mark and mark_to attributes." + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+
+ if (attrs.getValue(MARK) != null) {
+ positionCorrection = Integer.parseInt(attrs.getValue(MARK));
+ }
+ if (attrs.getValue(MARK_FROM) != null) {
+ positionCorrection = Integer.parseInt(attrs.getValue(MARK_FROM));
+ }
+ if (attrs.getValue(MARK_TO) == null) {
+ singleTokenCorrection = true;
+ } else {
+ endPositionCorrection = Integer.parseInt(attrs.getValue(MARK_TO));
+ if (endPositionCorrection > 0) {
+ throw new SAXException("End position correction (mark_to="
+ + endPositionCorrection
+ + ") cannot be larger than 0: " + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ singleTokenCorrection = false;
+ }
+ if (attrs.getValue(CASE_SENSITIVE) != null
+ && YES.equals(attrs.getValue(CASE_SENSITIVE))) {
+ caseSensitive = true;
+ }
+ } else if (qName.equals(EXCEPTION)) {
+ setExceptions(attrs);
+ } else if (qName.equals(AND)) {
+ inAndGroup = true;
+ } else if (qName.equals(UNIFY)) {
+ inUnification = true;
+ uniNegation = YES.equals(attrs.getValue(NEGATE));
+ uniCounter = 0;
+ } else if ("feature".equals(qName)) {
+ uFeature = attrs.getValue("id");
+ } else if (qName.equals(TYPE)) {
+ uType = attrs.getValue("id");
+ uTypeList.add(uType);
+ } else if (qName.equals(TOKEN)) {
+ setToken(attrs);
+ } else if (qName.equals(DISAMBIG)) {
+ inDisambiguation = true;
+ disambiguatedPOS = attrs.getValue(POSTAG);
+ if (attrs.getValue(ACTION) == null) {
+ // default mode:
+ disambigAction = DisambiguationPatternRule.DisambiguatorAction
+ .toAction("REPLACE");
+ } else {
+ disambigAction = DisambiguationPatternRule.DisambiguatorAction
+ .toAction(attrs.getValue(ACTION).toUpperCase());
+ }
+ disamb = new StringBuilder();
+ } else if (qName.equals(MATCH)) {
+ inMatch = true;
+ match = new StringBuilder();
+ Match.CaseConversion caseConversion = Match.CaseConversion.NONE;
+ if (attrs.getValue("case_conversion") != null) {
+ caseConversion = Match.CaseConversion.toCase(attrs
+ .getValue("case_conversion").toUpperCase());
+ }
+ Match.IncludeRange includeRange = Match.IncludeRange.NONE;
+ if (attrs.getValue("include_skipped") != null) {
+ includeRange = Match.IncludeRange.toRange(attrs
+ .getValue("include_skipped").toUpperCase());
+ }
+ final Match mWorker = new Match(attrs.getValue(POSTAG), attrs
+ .getValue("postag_replace"), YES
+ .equals(attrs.getValue(POSTAG_REGEXP)), attrs
+ .getValue("regexp_match"), attrs.getValue("regexp_replace"),
+ caseConversion, YES.equals(attrs.getValue("setpos")),
+ includeRange);
+ if (inDisambiguation) {
+ if (attrs.getValue(NO) != null) {
+ final int refNumber = Integer.parseInt(attrs.getValue(NO));
+ if (refNumber > elementList.size()) {
+ throw new SAXException(
+ "Only backward references in match elements are possible, tried to specify token "
+ + refNumber
+ + "\n Line: "
+ + pLocator.getLineNumber()
+ + ", column: " + pLocator.getColumnNumber() + ".");
+ }
+ mWorker.setTokenRef(refNumber);
+ posSelector = mWorker;
+ }
+ } else if (inToken) {
+ if (attrs.getValue(NO) != null) {
+ final int refNumber = Integer.parseInt(attrs.getValue(NO));
+ if (refNumber > elementList.size()) {
+ throw new SAXException(
+ "Only backward references in match elements are possible, tried to specify token "
+ + refNumber
+ + "\n Line: "
+ + pLocator.getLineNumber()
+ + ", column: " + pLocator.getColumnNumber() + ".");
+ }
+ mWorker.setTokenRef(refNumber);
+ tokenReference = mWorker;
+ elements.append('\\');
+ elements.append(refNumber);
+ }
+ }
+ } else if (qName.equals(RULEGROUP)) {
+ ruleGroupId = attrs.getValue("id");
+ ruleGroupName = attrs.getValue("name");
+ inRuleGroup = true;
+ } else if (qName.equals(UNIFICATION)) {
+ uFeature = attrs.getValue(FEATURE);
+ inUnificationDef = true;
+ } else if ("equivalence".equals(qName)) {
+ uType = attrs.getValue(TYPE);
+ } else if (qName.equals(WD)) {
+ wdLemma = attrs.getValue("lemma");
+ wdPos = attrs.getValue("pos");
+ inWord = true;
+ wd = new StringBuilder();
+ } else if (qName.equals(EXAMPLE)) {
+ inExample = true;
+ if (untouchedExamples == null) {
+ untouchedExamples = new ArrayList<String>();
+ }
+ if (disambExamples == null) {
+ disambExamples = new ArrayList<DisambiguatedExample>();
+ }
+ untouched = attrs.getValue(TYPE).equals("untouched");
+ if (attrs.getValue(TYPE).equals("ambiguous")) {
+ input = attrs.getValue("inputform");
+ output = attrs.getValue("outputform");
+ }
+ example = new StringBuilder();
+ } else if ("marker".equals(qName)) {
+ example.append("<marker>");
+ }
+ }
+
+ @Override
+ public void endElement(final String namespaceURI, final String sName,
+ final String qName) throws SAXException {
+ if ("rule".equals(qName)) {
+ final DisambiguationPatternRule rule = new DisambiguationPatternRule(id,
+ name, language, elementList, disambiguatedPOS, posSelector,
+ disambigAction);
+ rule.setStartPositionCorrection(positionCorrection);
+ if (singleTokenCorrection) {
+ endPositionCorrection = 1 - (elementList.size() - positionCorrection);
+ rule.setEndPositionCorrection(endPositionCorrection);
+ } else {
+ rule.setEndPositionCorrection(endPositionCorrection);
+ }
+ if (newWdList != null) {
+ if (disambigAction == DisambiguatorAction.ADD
+ || disambigAction == DisambiguatorAction.REMOVE) {
+ if (newWdList.size() != (elementList.size() - positionCorrection + endPositionCorrection)) {
+ throw new SAXException(
+ language.getName() + " rule error. The number of interpretations specified with wd: "
+ + newWdList.size()
+ + " must be equal to the number of matched tokens (" + (elementList.size() - positionCorrection + endPositionCorrection) + ")"
+ + "\n Line: " + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ rule.setNewInterpretations(newWdList
+ .toArray(new AnalyzedToken[newWdList.size()]));
+ }
+ newWdList.clear();
+ }
+ caseSensitive = false;
+ if (disambExamples != null) {
+ rule.setExamples(disambExamples);
+ }
+ if (untouchedExamples != null) {
+ rule.setUntouchedExamples(untouchedExamples);
+ }
+ rules.add(rule);
+ if (disambigAction == DisambiguatorAction.UNIFY
+ && (elementList.size() - positionCorrection + endPositionCorrection) != uniCounter) {
+ throw new SAXException(language.getName() + " rule error. The number unified tokens: "
+ + uniCounter + " must be equal to the number of matched tokens."
+ + "\n Line: " + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ if ((!singleTokenCorrection && (disambigAction == DisambiguatorAction.FILTER || disambigAction == DisambiguatorAction.REPLACE))
+ && ((elementList.size() - positionCorrection + endPositionCorrection) > 1)) {
+ throw new SAXException(
+ language.getName() + " rule error. Cannot replace or filter more than one token at a time."
+ + "\n Line: " + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ elementList.clear();
+ posSelector = null;
+ disambExamples = null;
+ untouchedExamples = null;
+ } else if (qName.equals(EXCEPTION)) {
+ finalizeExceptions();
+ } else if (qName.equals(AND)) {
+ inAndGroup = false;
+ andGroupCounter = 0;
+ tokenCounter++;
+ } else if (qName.equals(TOKEN)) {
+ if (!exceptionSet || tokenElement == null) {
+ tokenElement = new Element(elements.toString(), caseSensitive,
+ regExpression, tokenInflected);
+ tokenElement.setNegation(tokenNegated);
+ } else {
+ tokenElement.setStringElement(elements.toString());
+ }
+ if (skipPos != 0) {
+ tokenElement.setSkipNext(skipPos);
+ skipPos = 0;
+ }
+ if (posToken != null) {
+ tokenElement.setPosElement(posToken, posRegExp, posNegation);
+ posToken = null;
+ }
+
+ if (tokenReference != null) {
+ tokenElement.setMatch(tokenReference);
+ }
+
+ if (inAndGroup && andGroupCounter > 0) {
+ elementList.get(elementList.size() - 1)
+ .setAndGroupElement(tokenElement);
+ } else {
+ elementList.add(tokenElement);
+ }
+ if (inAndGroup) {
+ andGroupCounter++;
+ }
+ if (inUnification) {
+ tokenElement.setUnification(equivalenceFeatures);
+ if (uniNegation) {
+ tokenElement.setUniNegation();
+ }
+ uniCounter++;
+ }
+ if (inUnificationDef) {
+ language.getDisambiguationUnifier().setEquivalence(uFeature, uType, tokenElement);
+ elementList.clear();
+ }
+ if (tokenSpaceBeforeSet) {
+ tokenElement.setWhitespaceBefore(tokenSpaceBefore);
+ }
+ resetToken();
+ } else if (qName.equals(PATTERN)) {
+ inPattern = false;
+ if (positionCorrection >= tokenCounter) {
+ throw new SAXException(
+ "Attempt to mark a token no. ("+ positionCorrection +") that is outside the pattern (" + tokenCounter + "). Pattern elements are numbered starting from 0!" + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ if (tokenCounter - endPositionCorrection < 0 ) {
+ throw new SAXException(
+ "Attempt to mark a token no. ("+ endPositionCorrection +") that is outside the pattern (" + tokenCounter + "). Pattern elements are numbered starting from 0!" + "\n Line: "
+ + pLocator.getLineNumber() + ", column: "
+ + pLocator.getColumnNumber() + ".");
+ }
+ tokenCounter = 0;
+ } else if (qName.equals(MATCH)) {
+ if (inDisambiguation) {
+ posSelector.setLemmaString(match.toString());
+ } else if (inToken) {
+ tokenReference.setLemmaString(match.toString());
+ }
+ inMatch = false;
+ } else if (qName.equals(DISAMBIG)) {
+ inDisambiguation = false;
+ } else if (qName.equals(RULEGROUP)) {
+ inRuleGroup = false;
+ } else if (qName.equals(UNIFICATION) && inUnificationDef) {
+ inUnificationDef = false;
+ } else if ("feature".equals(qName)) {
+ equivalenceFeatures.put(uFeature, uTypeList);
+ uTypeList = new ArrayList<String>();
+ } else if (qName.equals(UNIFY)) {
+ inUnification = false;
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ } else if (qName.equals(WD)) {
+ addNewWord(wd.toString(), wdLemma, wdPos);
+ inWord = false;
+ } else if (EXAMPLE.equals(qName)) {
+ inExample = false;
+ if (untouched) {
+ untouchedExamples.add(example.toString());
+ } else {
+ disambExamples.add(new DisambiguatedExample(example.toString(), input, output));
+ }
+ } else if ("marker".equals(qName)) {
+ example.append("</marker>");
+ }
+ }
+
+ private void addNewWord(final String word, final String lemma,
+ final String pos) {
+ final AnalyzedToken newWd = new AnalyzedToken(word, pos, lemma);
+ if (newWdList == null) {
+ newWdList = new ArrayList<AnalyzedToken>();
+ }
+ newWdList.add(newWd);
+ }
+
+ @Override
+ public final void characters(final char[] buf, final int offset, final int len) {
+ final String s = new String(buf, offset, len);
+ if (inException) {
+ exceptions.append(s);
+ } else if (inToken && inPattern) {
+ elements.append(s);
+ } else if (inMatch) {
+ match.append(s);
+ } else if (inWord) {
+ wd.append(s);
+ } else if (inDisambiguation) {
+ disamb.append(s);
+ } else if (inExample) {
+ example.append(s);
+ }
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguator.java
new file mode 100644
index 0000000..6202dcb
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/en/EnglishRuleDisambiguator.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.en;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+public class EnglishRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.ENGLISH;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguator.java
new file mode 100644
index 0000000..6d81940
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/fr/FrenchRuleDisambiguator.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.fr;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+public class FrenchRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.FRENCH;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/nl/DutchRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/nl/DutchRuleDisambiguator.java
new file mode 100644
index 0000000..0d28afc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/nl/DutchRuleDisambiguator.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.nl;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+public class DutchRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.DUTCH;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/pl/PolishRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/pl/PolishRuleDisambiguator.java
new file mode 100644
index 0000000..8dc18eb
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/pl/PolishRuleDisambiguator.java
@@ -0,0 +1,31 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.disambiguation.rules.pl;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+public class PolishRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.POLISH;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguator.java
new file mode 100644
index 0000000..5b90bb0
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ro/RomanianRuleDisambiguator.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.ro;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+public class RomanianRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.ROMANIAN;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ru/RussianRuleDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ru/RussianRuleDisambiguator.java
new file mode 100644
index 0000000..f200de3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/rules/ru/RussianRuleDisambiguator.java
@@ -0,0 +1,36 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.rules.ru;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tagging.disambiguation.rules.AbstractRuleDisambiguator;
+
+/**
+ * Disambiguator for Russian
+ */
+
+public class RussianRuleDisambiguator extends AbstractRuleDisambiguator {
+
+ @Override
+ protected Language getLanguage() {
+ return Language.RUSSIAN;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/DemoDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/DemoDisambiguator.java
new file mode 100644
index 0000000..5ebec17
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/DemoDisambiguator.java
@@ -0,0 +1,38 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.xx;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+
+/**
+ * Trivial disambiguator.
+ * Does nothing at all. Just copies input to output.
+ *
+ * @author Jozef Licko
+ *
+ */
+public class DemoDisambiguator implements Disambiguator {
+
+ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) {
+ return input;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/TrimDisambiguator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/TrimDisambiguator.java
new file mode 100644
index 0000000..783dcd8
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/disambiguation/xx/TrimDisambiguator.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.disambiguation.xx;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tagging.disambiguation.Disambiguator;
+
+/**
+ * Trivial disambiguator. Just cuts out tags from the token. It leaves only the
+ * first tag.
+ *
+ * @author Jozef Licko
+ */
+public class TrimDisambiguator implements Disambiguator {
+
+ public final AnalyzedSentence disambiguate(final AnalyzedSentence input) {
+
+ final AnalyzedTokenReadings[] anTokens = input.getTokens();
+ final AnalyzedTokenReadings[] output = new AnalyzedTokenReadings[anTokens.length];
+
+ for (int i = 0; i < anTokens.length; i++) {
+
+ if (anTokens[i].getReadingsLength() > 1) {
+ final AnalyzedToken[] firstToken = new AnalyzedToken[1];
+ firstToken[0] = anTokens[i].getAnalyzedToken(0);
+ output[i] = new AnalyzedTokenReadings(firstToken, anTokens[i].getStartPos());
+ } else {
+ output[i] = anTokens[i];
+ }
+ }
+ return new AnalyzedSentence(output);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/en/EnglishTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/en/EnglishTagger.java
new file mode 100644
index 0000000..5bee4ad
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/en/EnglishTagger.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.en;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** English Part-of-speech tagger.
+ * Based on part-of-speech lists in Public Domain.
+ * see readme.txt for details, the POS tagset is
+ * described in tagset.txt
+ *
+ * @author Marcin Milkowski
+ */
+public class EnglishTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/en/english.dict";
+ }
+
+ public EnglishTagger() {
+ super();
+ setLocale(Locale.ENGLISH);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/eo/EsperantoTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/eo/EsperantoTagger.java
new file mode 100644
index 0000000..cff2e77
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/eo/EsperantoTagger.java
@@ -0,0 +1,360 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+/*
+ * Created on 01.10.2010
+ */
+package de.danielnaber.languagetool.tagging.eo;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.Tagger;
+
+/**
+ * A part-of-speech tagger for Esperanto.
+ *
+ * @author Dominique Pellé
+ */
+public class EsperantoTagger implements Tagger {
+
+ // These words don't need to be tagged.
+ private final static String wordsNotTagged[] = {
+ "ajn", "ĉi", "ĉu", "des", "do", "ja", "ju", "ke", "malpli",
+ "ne", "nek", "ol", "pli"
+ };
+
+ private final static Set setWordsNotTagged = new HashSet<String>(Arrays.asList(wordsNotTagged));
+
+ // Following preposition are never followed by accusative.
+ private final static String prepositionsNoAccusative[] = {
+ "al", "apud", "cis", "da", "de", "disde", "dum", "ekde", "el",
+ "far", "ĝis", "je", "kun", "laŭ", "malgraŭ", "na",
+ "per", "po", "post", "por", "pri", "pro", "sen", "super", "tra"
+ };
+
+ private final static Set setPrepositionsNoAccusative =
+ new HashSet<String>(Arrays.asList(prepositionsNoAccusative));
+
+ // Following preposition may be followed by accusative.
+ private final static String prepositionsAccusative[] = {
+ "anstataŭ", "en", "kontraŭ", "krom", "sur", "sub", "trans",
+ "preter", "ĉirkaŭ", "antaŭ", "malantaŭ", "ekster", "inter", "ĉe"
+ };
+
+ private final Set setPrepositionsAccusative =
+ new HashSet<String>(Arrays.asList(prepositionsAccusative));
+
+ // Conjunctions.
+ private final static String conjunctions[] = {
+ "ĉar", "kaj", "aŭ", "sed", "plus", "minus", "tamen"
+ };
+
+ private final static Set setConjunctions = new HashSet<String>(Arrays.asList(conjunctions));
+
+ // Numbers.
+ private final static String numbers[] = {
+ "nul", "unu", "du", "tri", "kvar", "kvin", "ses",
+ "sep", "ok", "naŭ", "dek", "cent", "mil"
+ };
+
+ private final static Set setNumbers = new HashSet<String>(Arrays.asList(numbers));
+
+ // Adverbs which do not end in -e
+ private final static String adverbs[] = {
+ "ankoraŭ", "almenaŭ", "apenaŭ", "baldaŭ", "preskaŭ", "eĉ",
+ "jam", "jen", "ĵus", "morgaŭ", "hodiaŭ", "hieraŭ", "nun",
+ "nur", "plu", "tre", "tro", "tuj", "for"
+ };
+
+ private final static Set setAdverbs = new HashSet<String>(Arrays.asList(adverbs));
+
+ // Set of transitive verbs and non-transitive verbs.
+ private Set setTransitiveVerbs = null;
+ private Set setNonTransitiveVerbs = null;
+
+ // Verbs always end with this pattern.
+ private final static Pattern patternVerb1 = Pattern.compile("(.*)(as|os|is|us|u|i)$");
+ private final static Pattern patternVerb2 = Pattern.compile(".*(ig|iĝ)(.s|.)$");
+
+ // Particips -ant-, -int, ont-, -it-, -it-, -ot-
+ // TODO: this is not used yet.
+ final Pattern patternParticiple =
+ Pattern.compile("(.*)([aio])(n?)t([aoe])(j?)(n?)$");
+ // Groups 11 22222 33 44444 55 66
+
+ // Pattern 'tabelvortoj'.
+ final Pattern patternTabelvorto =
+ Pattern.compile("^(i|ti|ki|ĉi|neni)((([uoae])(j?)(n?))|(am|al|es|el|om))$");
+ // Groups 111111111111111 22222222222222222222222222222222
+ // 3333333333333333 77777777777
+ // 444444 55 66
+
+ // Pattern of 'tabelvortoj' which are also tagged adverbs.
+ final Pattern patternTabelvortoAdverb =
+ Pattern.compile("(ti|i|ĉi|neni)(am|om|el|e)");
+
+ /**
+ * Load list of words from UTF-8 file (one word per line).
+ */
+ private Set loadWords(final InputStream file) throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ final Set<String> words = new HashSet<String>();
+ try {
+ isr = new InputStreamReader(file, "UTF-8");
+ br = new BufferedReader(isr);
+ String line;
+
+ while ((line = br.readLine()) != null) {
+ line = line.trim();
+ if (line.length() < 1) {
+ continue;
+ }
+ if (line.charAt(0) == '#') { // ignore comments
+ continue;
+ }
+ words.add(line);
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return words;
+ }
+
+ private void lazyInit() throws IOException {
+ if (setTransitiveVerbs != null) {
+ return;
+ }
+
+ // Load set of transitive and non-transitive verbs. Files don't contain
+ // verbs with suffix -iĝ or -ig since transitivity is obvious for those verbs.
+ // They also don't contain verbs with prefixes mal-, ek-, re-, mis- fi- and
+ // suffixes -ad, -aĉ, -et, -eg since these affixes never alter transitivity.
+ setTransitiveVerbs = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream("/eo/verb-tr.txt"));
+ setNonTransitiveVerbs = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream("/eo/verb-ntr.txt"));
+ }
+
+ public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens) throws IOException {
+
+ lazyInit();
+
+ Matcher matcher;
+
+ final List<AnalyzedTokenReadings> tokenReadings =
+ new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ final String lWord = word.toLowerCase();
+
+ if (lWord.equals(".")) {
+ l.add(new AnalyzedToken(word, "M fino", lWord));
+
+ } else if (lWord.equals("?")) {
+ l.add(new AnalyzedToken(word, "M fino dem", lWord));
+
+ } else if (lWord.equals("!")) {
+ l.add(new AnalyzedToken(word, "M fino kri", lWord));
+
+ } else if (lWord.equals("la")) {
+ l.add(new AnalyzedToken(word, "D", lWord));
+
+ } else if (setAdverbs.contains(lWord)) {
+ l.add(new AnalyzedToken(word, "E nak", lWord));
+
+ } else if (setWordsNotTagged.contains(lWord)) {
+ l.add(new AnalyzedToken(word, null, lWord));
+
+ // Pronouns.
+ } else if (lWord.equals("mi") || lWord.equals("ci")
+ || lWord.equals("li") || lWord.equals("ŝi")
+ || lWord.equals("ĝi") || lWord.equals("si")
+ || lWord.equals("oni")) {
+ l.add(new AnalyzedToken(word, "R nak np", lWord));
+ } else if (lWord.equals("min") || lWord.equals("cin")
+ || lWord.equals("lin") || lWord.equals("ŝin")
+ || lWord.equals("ĝin") || lWord.equals("sin")) {
+ l.add(new AnalyzedToken(word, "R akz np", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.equals("ni") || lWord.equals("ili")) {
+ l.add(new AnalyzedToken(word, "R nak pl", lWord));
+ } else if (lWord.equals("nin") || lWord.equals("ilin")) {
+ l.add(new AnalyzedToken(word, "R akz pl", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.equals("vi")) {
+ l.add(new AnalyzedToken(word, "R nak pn", lWord));
+ } else if (lWord.equals("vin")) {
+ l.add(new AnalyzedToken(word, "R akz pn", lWord.substring(0, lWord.length() - 1)));
+
+ // Conjunctions (kaj, sed, ...)
+ } else if (setConjunctions.contains(lWord)) {
+ l.add(new AnalyzedToken(word, "K", lWord));
+
+ // Prepositions.
+ } else if (setPrepositionsNoAccusative.contains(lWord)) {
+ l.add(new AnalyzedToken(word, "P sak", lWord));
+ } else if (setPrepositionsAccusative.contains(lWord)) {
+ l.add(new AnalyzedToken(word, "P kak", lWord));
+
+ } else if (setNumbers.contains(lWord)) {
+ l.add(new AnalyzedToken(word, "N", lWord));
+
+ // Tiu, kiu (tabelvortoj).
+ } else if ((matcher = patternTabelvorto.matcher(lWord)).find()) {
+ final String type1Group = matcher.group(1).substring(0, 1).toLowerCase();
+ final String type2Group = matcher.group(4);
+ final String plGroup = matcher.group(5);
+ final String accGroup = matcher.group(6);
+ final String type3Group = matcher.group(7);
+ final String type;
+ final String plural;
+ final String accusative;
+
+ if (accGroup == null) {
+ accusative = "xxx";
+ } else {
+ accusative = accGroup.toLowerCase().equals("n") ? "akz" : "nak";
+ }
+ if (plGroup == null) {
+ plural = " pn ";
+ } else {
+ plural = plGroup.toLowerCase().equals("j") ? " pl " : " np ";
+ }
+ type = ((type2Group == null) ? type3Group : type2Group).toLowerCase();
+
+ l.add(new AnalyzedToken(word, "T " +
+ accusative + plural + type1Group + " " + type, null));
+
+ if ((matcher = patternTabelvortoAdverb.matcher(lWord)).find()) {
+ l.add(new AnalyzedToken(word, "E nak", lWord));
+ }
+
+ // Words ending in .*oj?n? are nouns.
+ } else if (lWord.endsWith("o")) {
+ l.add(new AnalyzedToken(word, "O nak np", lWord));
+ } else if (lWord.endsWith("oj")) {
+ l.add(new AnalyzedToken(word, "O nak pl", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.endsWith("on")) {
+ l.add(new AnalyzedToken(word, "O akz np", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.endsWith("ojn")) {
+ l.add(new AnalyzedToken(word, "O akz pl", lWord.substring(0, lWord.length() - 2)));
+
+ // Words ending in .*aj?n? are nouns.
+ } else if (lWord.endsWith("a")) {
+ l.add(new AnalyzedToken(word, "A nak np", lWord));
+ } else if (lWord.endsWith("aj")) {
+ l.add(new AnalyzedToken(word, "A nak pl", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.endsWith("an")) {
+ l.add(new AnalyzedToken(word, "A akz np", lWord.substring(0, lWord.length() - 1)));
+ } else if (lWord.endsWith("ajn")) {
+ l.add(new AnalyzedToken(word, "A akz pl", lWord.substring(0, lWord.length() - 2)));
+
+ // Words ending in .*en? are adverbs.
+ } else if (lWord.endsWith("e")) {
+ l.add(new AnalyzedToken(word, "E nak", lWord));
+ } else if (lWord.endsWith("en")) {
+ l.add(new AnalyzedToken(word, "E akz", lWord.substring(0, lWord.length() - 1)));
+
+ // Verbs.
+ } else if ((matcher = patternVerb1.matcher(lWord)).find()) {
+ final String verb = matcher.group(1) + "i";
+ final String tense = matcher.group(2);
+ final String transitive;
+
+ final Matcher matcher2 = patternVerb2.matcher(lWord);
+ if (matcher2.find()) {
+ transitive = matcher2.group(1).equals("ig") ? "tr" : "nt";
+ } else {
+ final boolean isTransitive = setTransitiveVerbs.contains(verb);
+ final boolean isIntransitive = setNonTransitiveVerbs.contains(verb);
+
+ if (isTransitive) {
+ transitive = isIntransitive ? "tn" : "tr";
+ } else {
+ transitive = isIntransitive ? "nt" : "tn";
+ }
+ }
+ l.add(new AnalyzedToken(word, "V " + transitive + " " + tense, verb));
+
+ // Irregular word (no tag).
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+
+ // Participle (can be combined with other tags).
+ if ((matcher = patternParticiple.matcher(lWord)).find()) {
+ final String verb = matcher.group(1) + "i";
+ final String aio = matcher.group(2);
+ final String antAt = matcher.group(3).equals("n") ? "n" : "-";
+ final String aoe = matcher.group(4);
+ final String plural = matcher.group(5).equals("j") ? "pl" : "np";
+ final String accusative = matcher.group(6).equals("n") ? "akz" : "nak";
+ final String transitive;
+
+ final Matcher matcher2 = patternVerb2.matcher(lWord);
+ if (matcher2.find()) {
+ transitive = matcher2.group(1).equals("ig") ? "tr" : "nt";
+ } else {
+ final boolean isTransitive = setTransitiveVerbs.contains(verb);
+ final boolean isIntransitive = setNonTransitiveVerbs.contains(verb);
+
+ if (isTransitive) {
+ transitive = isIntransitive ? "tn" : "tr";
+ } else {
+ transitive = isIntransitive ? "nt" : "tn";
+ }
+ }
+ l.add(new AnalyzedToken(word, "C " + accusative + " " + plural + " " +
+ transitive + " " + aio + " " + antAt + " " + aoe,
+ verb));
+ }
+
+ pos += word.length();
+ tokenReadings.add(new AnalyzedTokenReadings(
+ l.toArray(new AnalyzedToken[0]), 0));
+ }
+ return tokenReadings;
+ }
+
+ public AnalyzedTokenReadings createNullToken(String token, int startPos) {
+ return new AnalyzedTokenReadings(
+ new AnalyzedToken(token, null, null), startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/es/SpanishTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/es/SpanishTagger.java
new file mode 100644
index 0000000..fa0700c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/es/SpanishTagger.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.es;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Spanish Tagger
+ *
+ * Based on FreeLing tagger dictionary
+ * and Spanish Wikipedia corpus tagged with FreeLing.
+ *
+ * @author Marcin Milkowski
+ */
+public class SpanishTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/es/spanish.dict";
+ }
+
+ public SpanishTagger() {
+ super();
+ setLocale(new Locale("es"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/fr/FrenchTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/fr/FrenchTagger.java
new file mode 100644
index 0000000..cbdea19
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/fr/FrenchTagger.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.fr;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** French Tagger
+ *
+ * Based on inDICO, implemented in FSA.
+ *
+ * @author Marcin Milkowski
+ */
+public class FrenchTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/fr/french.dict";
+ }
+
+ public FrenchTagger() {
+ super();
+ setLocale(Locale.FRENCH);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/gl/GalicianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/gl/GalicianTagger.java
new file mode 100644
index 0000000..9827d5d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/gl/GalicianTagger.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.gl;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Galician Part-of-speech tagger.
+ * Based on English tagger.
+ *
+ * @author Marcin Milkowski
+ *
+ * modified by Susana Sotelo Docio
+ */
+public class GalicianTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/gl/galician.dict";
+ }
+
+ public GalicianTagger() {
+ super();
+ setLocale(new Locale("gl"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/it/ItalianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/it/ItalianTagger.java
new file mode 100644
index 0000000..5ae55ac
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/it/ItalianTagger.java
@@ -0,0 +1,46 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.it;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/**
+ * Italian tagger
+ *
+ * Uses morph-it! lexicon compiled by Marco Baroni and Eros Zanchetta
+ *
+ * see resource/it/readme-morph-it.txt for tagset
+ *
+ * @author Marcin Milkowski
+ */
+public class ItalianTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/it/italian.dict";
+ }
+
+ public ItalianTagger() {
+ super();
+ setLocale(Locale.ITALIAN);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ml/MalayalamTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ml/MalayalamTagger.java
new file mode 100644
index 0000000..b15ee58
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ml/MalayalamTagger.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Daniel Naber, Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.ml;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Malayalam Part-of-speech tagger.
+ *
+ * @author Marcin Milkowski
+ */
+public class MalayalamTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/ml/malayalam.dict";
+ }
+
+ public MalayalamTagger() {
+ super();
+ setLocale(new Locale("ml"));
+ }
+}
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nb/BokmalTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nb/BokmalTagger.java
new file mode 100644
index 0000000..f7a5e09
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nb/BokmalTagger.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.nb;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Bokmål Part-of-speech tagger.
+ * Based on part-of-speech lists in Public Domain.
+ * see readme.txt for details, the POS tagset is
+ * described in tagset.txt
+ *
+ * @author Marcin Milkowski
+ */
+public class BokmalTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/nb/bokmal.dict";
+ }
+
+ public BokmalTagger() {
+ super();
+ setLocale(Locale.FRENCH);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nl/DutchTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nl/DutchTagger.java
new file mode 100644
index 0000000..ece7dee
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/nl/DutchTagger.java
@@ -0,0 +1,41 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.nl;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/**
+ * Dutch tagger.
+ *
+ * @author Marcin Milkowski
+ */
+public class DutchTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/nl/dutch.dict";
+ }
+
+ public DutchTagger() {
+ super();
+ setLocale(new Locale("nl"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/pl/PolishTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/pl/PolishTagger.java
new file mode 100644
index 0000000..881655e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/pl/PolishTagger.java
@@ -0,0 +1,117 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.pl;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Polish POS tagger based on FSA morphological dictionaries.
+ *
+ * @author Marcin Milkowski
+ */
+
+public class PolishTagger extends BaseTagger {
+
+ private static final String RESOURCE_FILENAME = "/pl/polish.dict";
+ private IStemmer morfologik;
+ private final Locale plLocale = new Locale("pl");
+
+ @Override
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME;
+ }
+
+ @Override
+ public final List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens)
+ throws IOException {
+ List<AnalyzedToken> taggerTokens;
+ List<AnalyzedToken> lowerTaggerTokens;
+ List<AnalyzedToken> upperTaggerTokens;
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching Lametyzator instance - lazy init
+ if (morfologik == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ morfologik = new DictionaryLookup(Dictionary.read(url));
+ }
+
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ final String lowerWord = word.toLowerCase(plLocale);
+ taggerTokens = asAnalyzedTokenList(word, morfologik.lookup(word));
+ lowerTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(lowerWord));
+ final boolean isLowercase = word.equals(lowerWord);
+
+ //normal case
+ addTokens(taggerTokens, l);
+
+ if (!isLowercase) {
+ //lowercase
+ addTokens(lowerTaggerTokens, l);
+ }
+
+ //uppercase
+ if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
+ if (isLowercase) {
+ upperTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(StringTools
+ .uppercaseFirstChar(word)));
+ if (!upperTaggerTokens.isEmpty()) {
+ addTokens(upperTaggerTokens, l);
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ } else {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ }
+ tokenReadings.add(new AnalyzedTokenReadings(l, pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+ }
+
+ private void addTokens(final List<AnalyzedToken> taggedTokens,
+ final List<AnalyzedToken> l) {
+ if (taggedTokens != null) {
+ for (AnalyzedToken at : taggedTokens) {
+ final String[] tagsArr = StringTools.asString(at.getPOSTag()).split("\\+");
+ for (final String currTag : tagsArr) {
+ l.add(new AnalyzedToken(at.getToken(), currTag,
+ at.getLemma()));
+ }
+ }
+ }
+ }
+
+
+} \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ro/RomanianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ro/RomanianTagger.java
new file mode 100644
index 0000000..76ffe2e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ro/RomanianTagger.java
@@ -0,0 +1,102 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ro;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/**
+ * Romanian Part-of-speech tagger
+ *
+ * @author Ionuț Păduraru
+ */
+public class RomanianTagger extends BaseTagger {
+
+ private String RESOURCE_FILENAME = "/ro/romanian.dict";
+
+ private IStemmer morfologik;
+ private static final Locale roLocale = new Locale("ro");
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME;
+ }
+
+ public RomanianTagger() {
+ super();
+ setLocale(roLocale);
+ }
+
+ public RomanianTagger(final String fileName) {
+ super();
+ RESOURCE_FILENAME = fileName;
+ setLocale(roLocale);
+ }
+
+ @Override
+ public final List<AnalyzedTokenReadings> tag(
+ final List<String> sentenceTokens) throws IOException {
+ List<WordData> taggerTokens;
+
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching Lametyzator instance - lazy init
+ if (morfologik == null) {
+ final URL url = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(RESOURCE_FILENAME);
+ morfologik = new DictionaryLookup(Dictionary.read(url));
+ }
+
+ for (final String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ taggerTokens = morfologik.lookup(word.toLowerCase(roLocale));
+ if (taggerTokens != null) {
+ for (WordData wd : taggerTokens) {
+ final String[] tagsArr = wd.getStem().toString().split("\\+");
+ for (final String currTag : tagsArr) {
+ l.add(new AnalyzedToken(word,
+ wd.getTag().toString(), currTag));
+ }
+ }
+ }
+
+ if (taggerTokens == null || taggerTokens.isEmpty()) {
+ l.add(new AnalyzedToken(word, null, null));
+ }
+ tokenReadings.add(new AnalyzedTokenReadings(l
+ .toArray(new AnalyzedToken[l.size()]), pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ru/RussianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ru/RussianTagger.java
new file mode 100644
index 0000000..b31d29b
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/ru/RussianTagger.java
@@ -0,0 +1,42 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.ru;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Part-of-speech tagger.
+ * Russian dictionary originally developed by www.aot.ru and licensed under LGPL.
+ * see readme.txt for details, the POS tagset is
+ * described in russian_tags.txt
+ *
+ * */
+public class RussianTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/ru/russian.dict";
+ }
+
+ public RussianTagger() {
+ super();
+ setLocale(new Locale("ru"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sk/SlovakTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sk/SlovakTagger.java
new file mode 100644
index 0000000..cd43a7c
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sk/SlovakTagger.java
@@ -0,0 +1,40 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.sk;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Slovak Part-of-speech tagger based on Russian Part-of-speech tagger.
+ * @author Zdenko Podobný
+ */
+
+public class SlovakTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/sk/slovak.dict";
+ }
+
+ public SlovakTagger() {
+ super();
+ setLocale(new Locale("sk"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sv/SwedishTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sv/SwedishTagger.java
new file mode 100644
index 0000000..78bad25
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/sv/SwedishTagger.java
@@ -0,0 +1,39 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.sv;
+
+import java.util.Locale;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Swedish Part-of-speech tagger.
+ * Based on DSSO. Will be expanded upon.
+ */
+public class SwedishTagger extends BaseTagger {
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + "/sv/swedish.dict";
+ }
+
+ public SwedishTagger() {
+ super();
+ setLocale(new Locale("sv"));
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/IPOSTag.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/IPOSTag.java
new file mode 100644
index 0000000..d31cd4d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/IPOSTag.java
@@ -0,0 +1,30 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.uk;
+
+public interface IPOSTag {
+
+ public static final String TAG_ADJ = "adj";
+ public static final String TAG_NOUN = "noun";
+ public static final String TAG_PLURAL = "plural";
+ public static final String TAG_VERB = "verb";
+ public static final String TAG_REFL = "refl";
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMorfoTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMorfoTagger.java
new file mode 100644
index 0000000..350b4ba
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMorfoTagger.java
@@ -0,0 +1,35 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.uk;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.BaseTagger;
+
+/** Ukrainian Part-of-speech tagger.
+ *
+ * @author Adriy Rysin
+ */
+public class UkrainianMorfoTagger extends BaseTagger {
+
+ private static final String RESOURCE_FILENAME = "/uk/ukrainian.dict";
+
+ public final String getFileName() {
+ return JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMyspellTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMyspellTagger.java
new file mode 100644
index 0000000..aa6bf8e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianMyspellTagger.java
@@ -0,0 +1,149 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tagging.uk;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.tagging.Tagger;
+
+/**
+ * Ukrainian Part-of-speech tagger. This class uses myspell uk_UA.dic dictionary
+ * file to assign tags to words. It only supports lemmas and three main parts of
+ * speech: noun, verb and adjective
+ *
+ * @author Adriy Rysin
+ */
+public class UkrainianMyspellTagger implements Tagger {
+
+ private static final String RESOURCE_FILENAME = "/uk/ukrainian.dict";
+
+ // private Lametyzator morfologik = null;
+ private HashMap<String, String[]> wordsToPos;
+
+
+ public final List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens)
+ throws IOException {
+
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ // caching Lametyzator instance - lazy init
+ // if (morfologik == null) {
+ // File resourceFile = JLanguageTool.getAbsoluteFile(JLanguageTool.getDataBroker().getResourceDir() + RESOURCE_FILENAME);
+ // morfologik = new
+ // Lametyzator(Tools.getInputStream(resourceFile.getAbsolutePath()),
+ // "utf-8", '+');
+ // }
+ if (wordsToPos == null) {
+ wordsToPos = new HashMap<String, String[]>();
+ final InputStream resourceFile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(RESOURCE_FILENAME);
+ // System.err.println("reading dict: " + resourceFile);
+
+ final BufferedReader input = new BufferedReader(new InputStreamReader(
+ resourceFile, Charset.forName("UTF-8")));
+
+ String line;
+ while ((line = input.readLine()) != null) {
+ line = line.trim();
+ if (line.matches("^[0-9]") || line.length() == 0) {
+ continue;
+ }
+
+ final String[] wrd = line.split("/");
+ if (wrd.length > 1) {
+ final String flags = wrd[1];
+ final List<String> posTags = new ArrayList<String>();
+
+ if (flags.matches("[abcdefghijklmnop]+")) {
+ posTags.add(IPOSTag.TAG_NOUN);
+ if (flags.equals("b")) {
+ posTags.add(IPOSTag.TAG_PLURAL);
+ }
+ } else if (flags.matches("[ABCDEFGHIJKLMN]+")) {
+ posTags.add(IPOSTag.TAG_VERB);
+ if (flags.matches("^[BDFHJLN]+")) {
+ posTags.add(IPOSTag.TAG_REFL);
+ }
+ } else if (flags.matches("[UV]+")) {
+ posTags.add(IPOSTag.TAG_ADJ);
+ }
+
+ if (posTags.size() > 0) {
+ wordsToPos.put(wrd[0], posTags.toArray(new String[0]));
+ }
+ }
+ }
+ // System.err.println("POSed words: " + wordsToPos.size());
+ input.close();
+ }
+
+ for (final String word : sentenceTokens) {
+ final List<AnalyzedToken> analyzedTokens = new ArrayList<AnalyzedToken>();
+
+ final String[] posTags = wordsToPos.get(word);
+ String[] lowerPosTags = null;
+
+ if (posTags != null) {
+ for (String posTag : posTags)
+ analyzedTokens.add(new AnalyzedToken(word, posTag, word));
+ } else {
+ final String lowerWord = word.toLowerCase();
+ if (!word.equals(lowerWord)) {
+ lowerPosTags = wordsToPos.get(lowerWord);
+ if (lowerPosTags != null) {
+ for (String lowerPosTag : lowerPosTags)
+ analyzedTokens.add(new AnalyzedToken(lowerWord, lowerPosTag,
+ lowerWord));
+ }
+ }
+ // else {
+ // analyzedTokens.add(new AnalyzedToken(word, null, word));
+ // }
+ }
+
+ if (posTags == null && lowerPosTags == null) {
+ analyzedTokens.add(new AnalyzedToken(word, null, null));
+ }
+
+ tokenReadings.add(new AnalyzedTokenReadings(analyzedTokens
+ .toArray(new AnalyzedToken[analyzedTokens.size()]), pos));
+ pos += word.length();
+ }
+
+ return tokenReadings;
+ }
+
+ public final AnalyzedTokenReadings createNullToken(final String token, final int startPos) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianTagger.java
new file mode 100644
index 0000000..5785766
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/uk/UkrainianTagger.java
@@ -0,0 +1,26 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tagging.uk;
+
+import de.danielnaber.languagetool.tagging.Tagger;
+
+public class UkrainianTagger extends UkrainianMyspellTagger implements Tagger {
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/xx/DemoTagger.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/xx/DemoTagger.java
new file mode 100644
index 0000000..031f45d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tagging/xx/DemoTagger.java
@@ -0,0 +1,63 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+/*
+ * Created on 23.12.2005
+ */
+package de.danielnaber.languagetool.tagging.xx;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tagging.Tagger;
+
+/**
+ * A trivial tagger that does nothing than assign null
+ * tags to words.
+ *
+ * @author Daniel Naber
+ */
+public class DemoTagger implements Tagger {
+
+ public List<AnalyzedTokenReadings> tag(final List<String> sentenceTokens) {
+
+ final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
+ int pos = 0;
+ for (String word : sentenceTokens) {
+ final List<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
+ // a real tagger would need to assign a POS tag
+ // in the next line instead of null:
+ l.add(new AnalyzedToken(word, null, null));
+ pos += word.length();
+ tokenReadings.add(new AnalyzedTokenReadings(l.toArray(new AnalyzedToken[0]), 0));
+ }
+ return tokenReadings;
+ }
+
+ public AnalyzedTokenReadings createNullToken(String token, int startPos) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token, null, null), startPos);
+ }
+
+ public AnalyzedToken createToken(String token, String posTag) {
+ return new AnalyzedToken(token, posTag, null);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizer.java
new file mode 100644
index 0000000..dc11420
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SRXSentenceTokenizer.java
@@ -0,0 +1,99 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import net.sourceforge.segment.TextIterator;
+import net.sourceforge.segment.srx.SrxDocument;
+import net.sourceforge.segment.srx.SrxParser;
+import net.sourceforge.segment.srx.SrxTextIterator;
+import net.sourceforge.segment.srx.io.Srx2Parser;
+import de.danielnaber.languagetool.JLanguageTool;
+
+/**
+ * Class to tokenize sentences using an SRX file.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class SRXSentenceTokenizer extends SentenceTokenizer {
+
+ private BufferedReader srxReader;
+ private final SrxDocument document;
+ private final String language;
+ private String parCode;
+
+ static final String RULES = "/segment.srx";
+
+ public SRXSentenceTokenizer(final String language) {
+ this.language = language;
+ try {
+ srxReader = new BufferedReader(new InputStreamReader(
+ JLanguageTool.getDataBroker().getFromResourceDirAsStream(RULES), "utf-8"));
+ } catch (Exception e) {
+ throw new RuntimeException("Could not load rules " + RULES + " from resource dir "
+ + JLanguageTool.getDataBroker().getResourceDir());
+ }
+ final SrxParser srxParser = new Srx2Parser();
+ document = srxParser.parse(srxReader);
+ setSingleLineBreaksMarksParagraph(false);
+ }
+
+ @Override
+ public final List<String> tokenize(final String text) {
+ final List<String> segments = new ArrayList<String>();
+ final TextIterator textIterator = new SrxTextIterator(document, language
+ + parCode, text);
+ while (textIterator.hasNext()) {
+ segments.add(textIterator.next());
+ }
+ return segments;
+ }
+
+ public final boolean singleLineBreaksMarksPara() {
+ return "_one".equals(parCode);
+ }
+
+ /**
+ * @param lineBreakParagraphs
+ * if <code>true</code>, single lines breaks are assumed to end a
+ * paragraph, with <code>false</code>, only two ore more consecutive
+ * line breaks end a paragraph
+ */
+ public final void setSingleLineBreaksMarksParagraph(
+ final boolean lineBreakParagraphs) {
+ if (lineBreakParagraphs) {
+ parCode = "_one";
+ } else {
+ parCode = "_two";
+ }
+ }
+
+ protected final void finalize() throws Throwable {
+ if (srxReader != null) {
+ srxReader.close();
+ }
+ super.finalize();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SentenceTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SentenceTokenizer.java
new file mode 100644
index 0000000..55d1ec6
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/SentenceTokenizer.java
@@ -0,0 +1,250 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Tokenizes text into sentences by looking for typical end-of-sentence markers,
+ * but considering exceptions (e.g. abbreviations).
+ *
+ * @author Daniel Naber
+ */
+public class SentenceTokenizer implements Tokenizer {
+
+ // end of sentence marker:
+ protected static final String EOS = "\0";
+ //private final static String EOS = "#"; // for testing only
+ protected static final String P = "[\\.!?…]"; // PUNCTUATION
+ protected static final String AP = "(?:'|«|\"||\\)|\\]|\\})?"; // AFTER PUNCTUATION
+ protected static final String PAP = P + AP;
+ protected static final String PARENS = "[\\(\\)\\[\\]]"; // parentheses
+
+ // Check out the private methods for comments and examples about these
+ // regular expressions:
+
+ private Pattern paragraph;
+ private static final Pattern paragraphByTwoLineBreaks = Pattern.compile("([\\n\\r]\\s*[\\n\\r])");
+ private static final Pattern paragraphByLineBreak = Pattern.compile("([\\n\\r])");
+
+ // add unbreakable field, for example footnote, if it's at the end of the sentence
+ private static final Pattern punctWhitespace = Pattern.compile("(" + PAP + "(\u0002)?\\s)");
+ // \p{Lu} = uppercase, with obeying Unicode (\p{Upper} is just US-ASCII!):
+ private static final Pattern punctUpperLower = Pattern.compile("(" + PAP
+ + ")([\\p{Lu}][^\\p{Lu}.])");
+ private static final Pattern letterPunct = Pattern.compile("(\\s[\\wüöäÜÖÄß]" + P + ")");
+ private static final Pattern abbrev1 = Pattern.compile("([^-\\wüöäÜÖÄß][\\wüöäÜÖÄß]" + PAP + "\\s)" + EOS);
+ private static final Pattern abbrev2 = Pattern.compile("([^-\\wüöäÜÖÄß][\\wüöäÜÖÄß]" + P + ")" + EOS);
+ private static final Pattern abbrev3 = Pattern.compile("(\\s[\\wüöäÜÖÄß]\\.\\s+)" + EOS);
+ private static final Pattern abbrev4 = Pattern.compile("(\\.\\.\\. )" + EOS + "([\\p{Ll}])");
+ private static final Pattern abbrev5 = Pattern.compile("(['\"]" + P + "['\"]\\s+)" + EOS);
+ private static final Pattern abbrev6 = Pattern.compile("([\"']\\s*)" + EOS + "(\\s*[\\p{Ll}])");
+ private static final Pattern abbrev7 = Pattern.compile("(\\s" + PAP + "\\s)" + EOS);
+ // z.b. 3.10. (im Datum):
+ private static final Pattern abbrev8 = Pattern.compile("(\\d{1,2}\\.\\d{1,2}\\.\\s+)" + EOS);
+ private static final Pattern repair1 = Pattern.compile("('[\\wüöäÜÖÄß]" + P + ")(\\s)");
+ private static final Pattern repair2 = Pattern.compile("(\\sno\\.)(\\s+)(?!\\d)");
+ private static final Pattern repair3 = Pattern.compile("([ap]\\.m\\.\\s+)([\\p{Lu}])");
+
+ private static final Pattern repair10 = Pattern.compile("([\\(\\[])([!?]+)([\\]\\)]) " + EOS);
+ private static final Pattern repair11 = Pattern.compile("([!?]+)([\\)\\]]) " + EOS);
+ private static final Pattern repair12 = Pattern.compile("(" + PARENS + ") " + EOS);
+
+ // some abbreviations:
+ private static final String[] ABBREV_LIST = {
+ // English -- but these work globally for all languages:
+ "Mr", "Mrs", "No", "pp", "St", "no",
+ "Sr", "Jr", "Bros", "etc", "vs", "esp", "Fig", "fig", "Jan", "Feb", "Mar", "Apr", "Jun", "Jul",
+ "Aug", "Sep", "Sept", "Oct", "Okt", "Nov", "Dec", "Ph.D", "PhD",
+ "al", // in "et al."
+ "cf", "Inc", "Ms", "Gen", "Sen", "Prof", "Corp", "Co"
+ };
+
+ private final Set<Pattern> abbreviationPatterns = new HashSet<Pattern>();
+
+ /**
+ * Month names like "Dezember" that should not be considered a sentence
+ * boundary in string like "13. Dezember". May also contain other
+ * words that indicate there's no sentence boundary when preceded
+ * by a number and a dot.
+ */
+ protected String[] monthNames;
+
+ /**
+ * Create a sentence tokenizer that uses the built-in abbreviations.
+ */
+ public SentenceTokenizer() {
+ this(new String[]{});
+ }
+
+ /**
+ * Create a sentence tokenizer with the given list of abbreviations,
+ * additionally to the built-in ones.
+ */
+ public SentenceTokenizer(final String[] abbrevList) {
+ final List<String> allAbbreviations = new ArrayList<String>();
+ allAbbreviations.addAll(Arrays.asList(abbrevList));
+ allAbbreviations.addAll(Arrays.asList(ABBREV_LIST));
+ for (String element : allAbbreviations) {
+ final Pattern pattern = Pattern.compile("(\\b" + element + PAP + "\\s)" + EOS);
+ abbreviationPatterns.add(pattern);
+ }
+ setSingleLineBreaksMarksParagraph(false);
+ }
+
+ /**
+ * @param lineBreakParagraphs if <code>true</code>, single lines breaks are assumed to end a paragraph,
+ * with <code>false</code>, only two ore more consecutive line breaks end a paragraph
+ */
+ public void setSingleLineBreaksMarksParagraph(final boolean lineBreakParagraphs) {
+ if (lineBreakParagraphs) {
+ paragraph = paragraphByLineBreak;
+ } else {
+ paragraph = paragraphByTwoLineBreaks;
+ }
+ }
+
+ public boolean singleLineBreaksMarksPara() {
+ return paragraph == paragraphByLineBreak;
+ }
+
+ /**
+ * Tokenize the given string to sentences.
+ */
+ public List<String> tokenize(String s) {
+ s = firstSentenceSplitting(s);
+ s = removeFalseEndOfSentence(s);
+ s = splitUnsplitStuff(s);
+ final StringTokenizer stringTokenizer =
+ new StringTokenizer(s, EOS);
+ final List<String> l = new ArrayList<String>();
+ while (stringTokenizer.hasMoreTokens()) {
+ final String sentence = stringTokenizer.nextToken();
+ l.add(sentence);
+ }
+ return l;
+ }
+
+ /**
+ * Add a special break character at all places with typical sentence delimiters.
+ */
+ private String firstSentenceSplitting(String s) {
+ // Double new-line means a new sentence:
+ s = paragraph.matcher(s).replaceAll("$1" + EOS);
+ // Punctuation followed by whitespace means a new sentence:
+ s = punctWhitespace.matcher(s).replaceAll("$1" + EOS);
+ // New (compared to the perl module): Punctuation followed by uppercase followed
+ // by non-uppercase character (except dot) means a new sentence:
+ s = punctUpperLower.matcher(s).replaceAll("$1" + EOS + "$2");
+ // Break also when single letter comes before punctuation:
+ s = letterPunct.matcher(s).replaceAll("$1" + EOS);
+ return s;
+ }
+
+ /**
+ * Repair some positions that don't require a split, i.e. remove the special break character at
+ * those positions.
+ */
+ protected String removeFalseEndOfSentence(String s) {
+ // Don't split at e.g. "U. S. A.":
+ s = abbrev1.matcher(s).replaceAll("$1");
+ // Don't split at e.g. "U.S.A.":
+ s = abbrev2.matcher(s).replaceAll("$1");
+ // Don't split after a white-space followed by a single letter followed
+ // by a dot followed by another whitespace.
+ // e.g. " p. "
+ s = abbrev3.matcher(s).replaceAll("$1");
+ // Don't split at "bla bla... yada yada" (TODO: use \.\.\.\s+ instead?)
+ s = abbrev4.matcher(s).replaceAll("$1$2");
+ // Don't split [.?!] when the're quoted:
+ s = abbrev5.matcher(s).replaceAll("$1");
+
+ // Don't split at abbreviations:
+ for (final Pattern abbrevPattern : abbreviationPatterns) {
+ final Matcher matcher = abbrevPattern.matcher(s);
+ s = matcher.replaceAll("$1");
+ }
+ // Don't break after quote unless there's a capital letter:
+ // e.g.: "That's right!" he said.
+ s = abbrev6.matcher(s).replaceAll("$1$2");
+
+ // fixme? not sure where this should occur, leaving it commented out:
+ // don't break: text . . some more text.
+ // text=~s/(\s\.\s)$EOS(\s*)/$1$2/sg;
+
+ // e.g. "Das ist . so." -> assume one sentence
+ s = abbrev7.matcher(s).replaceAll("$1");
+
+ // e.g. "Das ist . so." -> assume one sentence
+ s = abbrev8.matcher(s).replaceAll("$1");
+
+ // extension by dnaber --commented out, doesn't help:
+ // text = re.compile("(:\s+)%s(\s*[%s])" % (self.EOS, string.lowercase),
+ // re.DOTALL).sub("\\1\\2", text)
+
+ // "13. Dezember" etc. -> keine Satzgrenze:
+ if (monthNames != null) {
+ for (String element : monthNames) {
+ s = s.replaceAll("(\\d+\\.) " + EOS + "(" + element + ")", "$1 $2");
+ }
+ }
+
+ // z.B. "Das hier ist ein(!) Satz."
+ s = repair10.matcher(s).replaceAll("$1$2$3 ");
+
+ // z.B. "Das hier ist (genau!) ein Satz."
+ s = repair11.matcher(s).replaceAll("$1$2 ");
+
+ // z.B. "bla (...) blubb" -> kein Satzende
+ s = repair12.matcher(s).replaceAll("$1 ");
+
+ return s;
+ }
+
+ /**
+ * Treat some more special cases that make up a sentence boundary. Insert the special break
+ * character at these positions.
+ */
+ private String splitUnsplitStuff(String s) {
+ // e.g. "x5. bla..." -- not sure, leaving commented out:
+ // text = re.compile("(\D\d+)(%s)(\s+)" % self.P, re.DOTALL).sub("\\1\\2%s\\3" % self.EOS, text)
+ // Not sure about this one, leaving out four now:
+ // text = re.compile("(%s\s)(\s*\()" % self.PAP, re.DOTALL).sub("\\1%s\\2" % self.EOS, text)
+ // Split e.g.: He won't. #Really.
+ s = repair1.matcher(s).replaceAll("$1" + EOS + "$2");
+ // Split e.g.: He won't say no. Not really.
+ s = repair2.matcher(s).replaceAll("$1" + EOS + "$2");
+ // Split at "a.m." or "p.m." followed by a capital letter.
+ s = repair3.matcher(s).replaceAll("$1" + EOS + "$2");
+ return s;
+ }
+
+ /*public static void main(final String[] args) {
+ final SentenceTokenizer st = new GermanSentenceTokenizer();
+ st.tokenize("Er sagte (...) und");
+ }*/
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/Tokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/Tokenizer.java
new file mode 100644
index 0000000..9a49fbe
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/Tokenizer.java
@@ -0,0 +1,32 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import java.util.List;
+
+/**
+ * Interface for classes that tokenize text into smaller units.
+ *
+ * @author Daniel Naber
+ */
+public interface Tokenizer {
+
+ public abstract List<String> tokenize(String text);
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/WordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/WordTokenizer.java
new file mode 100644
index 0000000..6764c34
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/WordTokenizer.java
@@ -0,0 +1,59 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+/**
+ * Tokenizes a sentence into words.
+ * Punctuation and whitespace gets its own token.
+ *
+ * @author Daniel Naber
+ */
+public class WordTokenizer implements Tokenizer {
+
+ public WordTokenizer() {
+ }
+
+ public List<String> tokenize(final String text) {
+ final List<String> l = new ArrayList<String>();
+ final StringTokenizer st = new StringTokenizer(text,
+ "\u0020\u00A0\u115f\u1160\u1680"
+ + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ + "\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
+ + "\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
+ + "\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
+ + "\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
+ + ",.;()[]{}<>!?:/\\\"'«»„”“‘`’…¿¡\t\n\r", true);
+ while (st.hasMoreElements()) {
+ l.add(st.nextToken());
+ }
+ return l;
+ }
+
+}
+
+
+
+
+
+
+ \ No newline at end of file
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizer.java
new file mode 100644
index 0000000..2f0a4f4
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/cs/CzechSentenceTokenizer.java
@@ -0,0 +1,228 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+/*
+ * CzechSentenceTokenizer.java
+ *
+ * Created on 25.1.2007, 11:45
+ */
+
+package de.danielnaber.languagetool.tokenizers.cs;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+/**
+ *
+ * @author Jozef Licko
+ */
+public class CzechSentenceTokenizer extends SentenceTokenizer {
+
+ // End of sentence marker.
+ private static final String EOS = "\0";
+
+ // private final static String EOS = "#"; // for testing only
+
+ // Punctuation.
+ private static final String P = "[\\.!?…]";
+
+ // After punctuation.
+ private static final String AP = "(?:'|«|\"|”|\\)|\\]|\\})?";
+
+ private static final String PAP = P + AP;
+
+ // Check out the private methods for comments and examples about these
+ // regular expressions:
+
+ private static final Pattern paragraphByTwoLineBreaks = Pattern.compile("(\\n\\s*\\n)");
+
+ private static final Pattern paragraphByLineBreak = Pattern.compile("(\\n)");
+
+ // add unbreakable field, for example footnote, if it's at the end of the sentence
+ private static final Pattern punctWhitespace = Pattern.compile("(" + PAP + "(\u0002)?\\s)");
+
+ // \p{Lu} = uppercase, with obeying Unicode (\p{Upper} is just US-ASCII!):
+ private static final Pattern punctUpperLower = Pattern.compile("(" + PAP
+ + ")([\\p{Lu}][^\\p{Lu}.])");
+
+ private static final Pattern letterPunct = Pattern
+ .compile("(\\s[\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]" + P + ")");
+
+ private static final Pattern abbrev1 = Pattern
+ .compile("([^-\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ][\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]" + PAP
+ + "\\s)" + EOS);
+
+ private static final Pattern abbrev2 = Pattern
+ .compile("([^-\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ][\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]" + P
+ + ")" + EOS);
+
+ private static final Pattern abbrev3 = Pattern
+ .compile("(\\s[\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]\\.\\s+)" + EOS);
+
+ private static final Pattern abbrev4 = Pattern.compile("(\\.\\.\\. )" + EOS + "([\\p{Ll}])");
+ private static final Pattern abbrev5 = Pattern.compile("(['\"]" + P + "['\"]\\s+)" + EOS);
+ private static final Pattern abbrev6 = Pattern.compile("([\"']\\s*)" + EOS + "(\\s*[\\p{Ll}])");
+ private static final Pattern abbrev7 = Pattern.compile("(\\s" + PAP + "\\s)" + EOS);
+ // z.b. 3.10. (im Datum):
+ private static final Pattern abbrev8 = Pattern.compile("(\\d{1,2}\\.\\d{1,2}\\.\\s+)" + EOS);
+ private static final Pattern repair1 = Pattern.compile("('[\\wáčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]"
+ + P + ")(\\s)");
+ private static final Pattern repair2 = Pattern.compile("(\\sno\\.)(\\s+)(?!\\d)");
+
+ // Czech abbreviations (ver. 0.2)
+
+ // various titles
+ private static final String TITLES = "Bc|BcA|Ing|Ing.arch|MUDr|MVDr|MgA|Mgr|JUDr|PhDr|" +
+ "RNDr|PharmDr|ThLic|ThDr|Ph.D|Th.D|prof|doc|CSc|DrSc|dr. h. c|PaedDr|Dr|PhMr|DiS";
+
+ // as a single regexp:
+ private static final String ABBREVIATIONS = "abt|ad|a.i|aj|angl|anon|apod|atd|atp|aut|bd|biogr|" +
+ "b.m|b.p|b.r|cca|cit|cizojaz|c.k|col|čes|čín|čj|ed|facs|fasc|fol|fot|franc|h.c|hist|hl|" +
+ "hrsg|ibid|il|ind|inv.č|jap|jhdt|jv|koed|kol|korej|kl|krit|lat|lit|m.a|maď|mj|mp|násl|" +
+ "např|nepubl|něm|no|nr|n.s|okr|odd|odp|obr|opr|orig|phil|pl|pokrač|pol|port|pozn|př.kr|" +
+ "př.n.l|přel|přeprac|příl|pseud|pt|red|repr|resp|revid|rkp|roč|roz|rozš|samost|sect|" +
+ "sest|seš|sign|sl|srv|stol|sv|šk|šk.ro|špan|tab|t.č|tis|tj|tř|tzv|univ|uspoř|vol|" +
+ "vl.jm|vs|vyd|vyobr|zal|zejm|zkr|zprac|zvl|n.p"
+ + "|" + TITLES;
+
+ private Pattern paragraph;
+
+ /**
+ * Create a sentence tokenizer.
+ */
+ public CzechSentenceTokenizer() {
+ setSingleLineBreaksMarksParagraph(false);
+ }
+
+ /**
+ * @param lineBreakParagraphs if <code>true</code>, single lines breaks are assumed to end a paragraph,
+ * with <code>false</code>, only two ore more consecutive line breaks end a paragraph
+ */
+ public final void setSingleLineBreaksMarksParagraph(final boolean lineBreakParagraphs) {
+ if (lineBreakParagraphs)
+ paragraph = paragraphByLineBreak;
+ else
+ paragraph = paragraphByTwoLineBreaks;
+ }
+
+ public final List<String> tokenize(String s) {
+ s = firstSentenceSplitting(s);
+ s = removeFalseEndOfSentence(s);
+ s = splitUnsplitStuff(s);
+ final StringTokenizer stringTokenizer =
+ new StringTokenizer(s, EOS);
+ List<String> l = new ArrayList<String>();
+ while (stringTokenizer.hasMoreTokens()) {
+ String sentence = stringTokenizer.nextToken();
+ l.add(sentence);
+ }
+ return l;
+ }
+
+ /**
+ * Add a special break character at all places with typical sentence delimiters.
+ */
+ private String firstSentenceSplitting(String s) {
+ // Double new-line means a new sentence:
+ s = paragraph.matcher(s).replaceAll("$1" + EOS);
+ // Punctuation followed by whitespace means a new sentence:
+ s = punctWhitespace.matcher(s).replaceAll("$1" + EOS);
+ // New (compared to the perl module): Punctuation followed by uppercase followed
+ // by non-uppercase character (except dot) means a new sentence:
+ s = punctUpperLower.matcher(s).replaceAll("$1" + EOS + "$2");
+ // Break also when single letter comes before punctuation:
+ s = letterPunct.matcher(s).replaceAll("$1" + EOS);
+ return s;
+ }
+
+ /**
+ * Repair some positions that don't require a split, i.e. remove the special break character at
+ * those positions.
+ */
+ protected String removeFalseEndOfSentence(String s) {
+ // Don't split at e.g. "U. S. A.":
+ s = abbrev1.matcher(s).replaceAll("$1");
+ // Don't split at e.g. "U.S.A.":
+ s = abbrev2.matcher(s).replaceAll("$1");
+ // Don't split after a white-space followed by a single letter followed
+ // by a dot followed by another whitespace.
+ // e.g. " p. "
+ s = abbrev3.matcher(s).replaceAll("$1");
+ // Don't split at "bla bla... yada yada" (TODO: use \.\.\.\s+ instead?)
+ s = abbrev4.matcher(s).replaceAll("$1$2");
+ // Don't split [.?!] when the're quoted:
+ s = abbrev5.matcher(s).replaceAll("$1");
+
+ // Don't split at abbreviations, treat them case insensitive
+ //TODO: don't split at some abbreviations followed by uppercase
+ //E.g., "Wojna rozpoczęła się w 1918 r. To była krwawa jatka"
+ //should be split at "r."... But
+ //"Ks. Jankowski jest analfabetą" shouldn't be split...
+ //this requires a special list of abbrevs used before names etc.
+
+ //removing the loop and using only one regexp - this is definitely much, much faster
+ Pattern pattern = Pattern.compile("(?u)(\\b(" + ABBREVIATIONS + ")" + PAP + "\\s)" + EOS);
+ s = pattern.matcher(s).replaceAll("$1");
+
+ // Don't break after quote unless there's a capital letter:
+ // e.g.: "That's right!" he said.
+ s = abbrev6.matcher(s).replaceAll("$1$2");
+
+ // fixme? not sure where this should occur, leaving it commented out:
+ // don't break: text . . some more text.
+ // text=~s/(\s\.\s)$EOS(\s*)/$1$2/sg;
+
+ // e.g. "Das ist . so." -> assume one sentence
+ s = abbrev7.matcher(s).replaceAll("$1");
+
+ // e.g. "Das ist . so." -> assume one sentence
+ s = abbrev8.matcher(s).replaceAll("$1");
+
+ // extension by dnaber --commented out, doesn't help:
+ // text = re.compile("(:\s+)%s(\s*[%s])" % (self.EOS, string.lowercase),
+ // re.DOTALL).sub("\\1\\2", text)
+
+ s = s.replaceAll("(\\d+\\.) " + EOS + "([\\p{L}&&[^\\p{Lu}]]+)", "$1 $2");
+
+ // z.B. "Das hier ist ein(!) Satz."
+ s = s.replaceAll("\\(([!?]+)\\) " + EOS, "($1) ");
+ return s;
+ }
+
+ /**
+ * Treat some more special cases that make up a sentence boundary. Insert the special break
+ * character at these positions.
+ */
+ private String splitUnsplitStuff(String s) {
+ // e.g. "x5. bla..." -- not sure, leaving commented out:
+ // text = re.compile("(\D\d+)(%s)(\s+)" % self.P, re.DOTALL).sub("\\1\\2%s\\3" % self.EOS, text)
+ // Not sure about this one, leaving out four now:
+ // text = re.compile("(%s\s)(\s*\()" % self.PAP, re.DOTALL).sub("\\1%s\\2" % self.EOS, text)
+ // Split e.g.: He won't. #Really.
+ s = repair1.matcher(s).replaceAll("$1" + EOS + "$2");
+ // Split e.g.: He won't say no. Not really.
+ s = repair2.matcher(s).replaceAll("$1" + EOS + "$2");
+ return s;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/da/DanishSentenceTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/da/DanishSentenceTokenizer.java
new file mode 100644
index 0000000..32db6d5
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/da/DanishSentenceTokenizer.java
@@ -0,0 +1,43 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.da;
+
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+
+/**
+ * @deprecated use {@code new SRXSentenceTokenizer("da")} instead
+ * @author Daniel Naber
+ */
+public class DanishSentenceTokenizer extends SentenceTokenizer {
+
+ private static final String[] ABBREV_LIST = {
+"abs", "abstr", "adj", "adm", "adr", "adv", "afd", "afg", "afl", "afs", "afvig", "agro", "akad", "akk", "allr", "alm", "amer", "anat", "ang", "anm", "anv", "apot", "appos", "apr", "arab", "arkais", "arkæol", "arp", "arr", "art", "ass", "astr", "att", "attrib", "aud", "aug", "aut", "bag", "barb", "barnespr", "bd", "bdt", "beg", "besl", "best", "bet", "bhk", "biavl", "bibet", "bibl", "bibliot", "billard", "billedl", "biol", "bjergv", "bk", "bl", "bogb", "bogh", "bogtr", "bornh", "bot", "br", "bryg", "bto", "bygn", "bødk", "ca", "cand", "Chr", "cirk", "cit", "co", "d", "da", "dagl", "dans", "dat", "dec", "def", "demonstr", "dep", "dial", "diam", "dim", "disp", "distr", "distrib", "dobb", "dr", "dvs", "e", "egl", "eks", "eksam", "ekskl", "eksp", "ekspl", "el", "ell", "ellipt", "emb", "endv", "eng", "enk", "ent", "etc", "etnogr", "eufem", "eur", "event", "evt", "f", "fagl", "fakt", "farv", "feb", "ff", "fhv", "fig", "filos", "fisk", "fk", "fl", "flg", "flt", "flyv", "fmd", "fon", "foragt", "forb", "foreg", "forf", "forsikr", "forsk", "forst", "foræld", "fot", "fr", "fre", "fris", "frk", "fsv", "fuldm", "fx", "fys", "fysiol", "fægt", "gart", "gartn", "garv", "gdr", "gen", "genopt", "geogr", "geol", "geom", "germ", "gl", "glarm", "glda", "gldgs", "glholl", "glno", "gns", "got", "gr", "gradbøjn", "gram", "gross", "grundbet", "græc", "guldsm", "gym", "h", "hat", "hd", "hebr", "henh", "hensobj", "herald", "hhv", "hist", "hj", "holl", "hovedbet", "hr", "hty", "højtid", "haandarb", "haandv", "i", "if", "ifm", "ift", "iht", "imp", "indb", "indik", "inf", "ing", "Inkl", "inkl", "insp", "instr", "interj", "intk", "intr", "iron", "isl", "ital", "jan", "jarg", "jf", "jnr", "jr", "jul", "jun", "jur", "jy", "jæg", "jærnb", "jød", "Kbh", "kbh", "kem", "kgl", "kirk", "kl", "kld", "knsp", "kog", "koll", "komm", "komp", "konj", "konkr", "kons", "Kr", "kr", "kurv", "kvt", "køkkenspr", "l", "landbr", "landmaaling", "lat", "lb", "lic", "lign", "litt", "Ll", "log", "Loll", "loll", "lrs", "lør", "m", "maj", "maks", "mal", "man", "mar", "mat", "mdl", "mdr", "med", "medl", "meng", "merc", "meteorol", "meton", "metr", "mf", "mfl", "mht", "mia", "min", "mineral", "mio", "ml", "mlat", "mm", "mnt", "mods", "modsætn", "modt", "mr", "mrk", "mur", "mv", "mvh", "mytol", "møl", "mønt", "n", "naturv", "ndf", "Ndr", "nedsæt", "nht", "no", "nom", "nov", "nr", "nt", "num", "nyda", "nydann", "nylat", "naal", "obj", "obl", "oblik", "obs", "odont", "oecon", "oeng", "ofl", "ogs", "oht", "okt", "oldfr", "oldfris", "oldn", "olgn", "omg", "omkr", "omtr", "ons", "opr", "ordspr", "org", "osax", "osv", "ovenst", "overf", "overs", "ovf", "p", "pag", "part", "pass", "pct", "perf", "pga", "ph", "pharm", "phil", "pk", "pkt", "pl", "plur", "poet", "polit", "port", "poss", "post", "pott", "pr", "pron", "propr", "prov", "præd", "præp", "præs", "præt", "psych", "pt", "pæd", "paavirkn", "reb", "ref", "refl", "regn", "relat", "relig", "resp", "retor", "rid", "rigsspr", "run", "russ", "s", "sa", "sanskr", "scient", "sdjy", "sdr", "sek", "sen", "sep", "sept", "shetl", "sj", "sjæll", "skibsbygn", "sko", "skol", "skr", "skriftspr", "skræd", "Skt", "slagt", "slutn", "smed", "sml", "smsat", "smst", "snedk", "soldat", "sp", "spec", "sport", "spot", "spr", "sprogv", "spøg", "ssg", "ssgr", "st", "stk", "str", "stud", "subj", "subst", "superl", "sv", "sætn", "søn", "talem", "talespr", "tandl", "td", "tdl", "teat", "techn", "telef", "telegr", "teol", "th", "theol", "tir", "tirs", "tlf", "told", "tor", "tors", "trans", "tsk", "ty", "tyrk", "tøm", "u", "ubesl", "ubest", "udd", "udenl", "udg", "udtr", "uegl", "ugtl", "ult", "underbet", "undt", "univ", "upers", "ur", "urnord", "v", "var", "vbs", "vedk", "vedl", "vedr", "vejl", "verb", "vet", "vha", "vol", "vs", "vsa", "vulg", "væv", "zool", "æ", "æda", "ænht", "ænyd", "æstet", "ø", "å", "årg", "årh"
+ };
+
+ // Month names like "januar" that should not be considered a sentence
+ // boundary in string like "13. januar".
+ private static final String[] MONTH_NAMES = { "januar", "februar", "marts", "april", "maj",
+ "juni", "juli", "august", "september", "oktober", "november", "december" };
+
+ public DanishSentenceTokenizer() {
+ super(ABBREV_LIST);
+ super.monthNames = MONTH_NAMES;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanCompoundTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanCompoundTokenizer.java
new file mode 100644
index 0000000..fb141a0
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanCompoundTokenizer.java
@@ -0,0 +1,47 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.de;
+
+import java.io.IOException;
+import java.util.List;
+
+import de.abelssoft.wordtools.jWordSplitter.impl.GermanWordSplitter;
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+/**
+ * Split German nouns using the jWordSplitter library.
+ *
+ * @author Daniel Naber
+ */
+public class GermanCompoundTokenizer implements Tokenizer {
+
+ private final GermanWordSplitter wordSplitter;
+
+ public GermanCompoundTokenizer() throws IOException {
+ wordSplitter = new GermanWordSplitter(false);
+ wordSplitter.setStrictMode(true); // required for now to make minimum length work
+ wordSplitter.setMinimumWordLength(3);
+ }
+
+ public List<String> tokenize(String word) {
+ return (List<String>) wordSplitter.splitWord(word);
+ }
+
+}
+
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizer.java
new file mode 100644
index 0000000..31dab43
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/de/GermanSentenceTokenizer.java
@@ -0,0 +1,95 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.de;
+
+import java.util.regex.Pattern;
+
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import org.apache.commons.lang.ArrayUtils;
+
+/**
+ * @deprecated use {@code new SRXSentenceTokenizer("de")} instead
+ */
+public class GermanSentenceTokenizer extends SentenceTokenizer {
+
+ private static final String[] ABBREV_LIST = {
+ "d", "Übers", "usw", "bzw", "Abh", "Abk", "Abt", "ahd", "Akk",
+ "allg", "alltagsspr", "altdt", "alttest", "amerikan", "Anh",
+ "Ank", "Anm", "Art", "Az", "Bat", "bayr", "Bd", "Bde", "Bed",
+ "Bem", "bes", "bez", "Bez", "Bhf", "bspw", "btto", "bw", "bzw",
+ "cts", "ct", "Ca", "ca", "chem", "chin", "Chr", "cresc", "dat", "Dat",
+ "desgl", "ders", "dgl", "Di", "Dipl", "Dir", "Do", "Doz", "Dr",
+ "dt", "ebd", "Ed", "eigtl", "engl", "Erg", "al", "etc", "etw",
+ "ev", "evtl", "Evtl", "exkl", "Expl", "Exz", "ff", "Fa", "fachspr", "fam",
+ "fem", "Fem", "Fr", "fr", "franz", "frz", "frdl", "Frl",
+ "Fut", "Gd", "geb", "gebr", "Gebr", "geh", "geh", "geleg", "gen",
+ "Gen", "germ", "gesch", "ges", "get", "ggf", "Ggs", "ggT",
+ "griech", "hebr", "hg", "Hrsg", "Hg", "hist", "hochd", "hochspr",
+ "Hptst", "Hr", "Allg", "ill", "inkl", "incl", "Ind", "Inf", "Ing",
+ "ital", "Tr", "Jb", "Jg", "Jh", "Jhd", "jmd", "jmdm", "jmdn", "jmds",
+ "jur", "Kap", "kart", "kath", "kfm", "kaufm", "Kfm", "kgl",
+ "Kl", "Konj", "Krs", "Kr", "Kto", "lat", "lfd", "Lit", "lt",
+ "Lz", "Mask", "mask", "Mrd", "mdal", "med", "met", "mhd", "Mi",
+ "Mio", "min", "Mo", "mod", "nachm", "nördlBr", "neutr",
+ "Nhd", "Nom", "Nr", "Nrn", "Num", "Obj", "od", "dgl", "offz",
+ "Part", "Perf", "Pers", "Pfd", "Pl", "Plur",
+ "pl", "Plusq", "Pos", "pp", "Präp", "Präs", "Prät", "Prov", "Prof",
+ "rd", "reg", "resp", "Rhld", "rit", "Sa", "südl", "Br",
+ "sel", "sen", "Sept", "Sing", "sign", "So", "sog", "Sp", "St",
+ "St", "St", "Std", "stacc", "Str", "stud", "Subst", "sva", "svw",
+ "sZ", "Tel", "Temp", "trans", "Tsd", "übertr", "übl", "ff", "ugs", "univ",
+ "urspr", "usw", "vgl", "Vol", "vorm", "vorm", "Vp", "Vs",
+ "vs", "wg", "Hd", "Ztr", "zus", "Zus", "zzt", "zz", "Zz", "Zt",
+ "Min", "bzgl"};
+
+ // einige deutsche Monate, vor denen eine Zahl erscheinen kann,
+ // ohne dass eine Satzgrenze erkannt wird (z.B. "am 13. Dezember" -> keine Satzgrenze)
+ private static final String[] MONTH_NAMES = { "Januar", "Februar", "März", "April", "Mai",
+ "Juni", "Juli", "August", "September", "Oktober", "November", "Dezember",
+ // ähnliche Fälle außerhalb der Monatsnamen:
+ "Jh", "Jhd", "Jahrhundert", "Jahrhunderts", "Geburtstag", "Geburtstags", "Platz", "Platzes"};
+
+ /** don't split at cases like "Friedrich II. wird auch..." */
+ private static final Pattern REPAIR_NAME_PATTERN = Pattern.compile("( [IVX]+\\.) " + EOS + "([^A-ZÖÄÜ]+)");
+
+ /** don't split at cases like "im 13. oder 14. Jahrhundert" */
+ private static final Pattern REPAIR_NUMBER_PATTERN = Pattern.compile("(\\d+\\.)(\\s+)" + EOS + "(und|oder|bis) ");
+
+ public GermanSentenceTokenizer() {
+ super(ABBREV_LIST);
+ super.monthNames = MONTH_NAMES;
+ }
+
+ /**
+ * Create a sentence tokenizer with the given list of abbreviations,
+ * additionally to the built-in ones.
+ */
+ public GermanSentenceTokenizer(final String[] abbrevList) {
+ super((String[]) ArrayUtils.addAll(ABBREV_LIST, abbrevList));
+ super.monthNames = MONTH_NAMES;
+ }
+
+ protected String removeFalseEndOfSentence(String s) {
+ s = super.removeFalseEndOfSentence(s);
+ s = REPAIR_NAME_PATTERN.matcher(s).replaceAll("$1 $2");
+ s = REPAIR_NUMBER_PATTERN.matcher(s).replaceAll("$1$2$3 ");
+ return s;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/en/EnglishWordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/en/EnglishWordTokenizer.java
new file mode 100644
index 0000000..5b29d18
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/en/EnglishWordTokenizer.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.en;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+/**
+ * Tokenizes a sentence into words. Punctuation and whitespace gets its own token.
+ *
+ * @author Daniel Naber
+ */
+public class EnglishWordTokenizer implements Tokenizer {
+
+ public EnglishWordTokenizer() {
+ }
+
+ public List<String> tokenize(final String text) {
+ final List<String> tokens = new ArrayList<String>();
+ final StringTokenizer st = new StringTokenizer(text,
+ "\u0020\u00A0\u115f\u1160\u1680"
+ + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ + "\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
+ + "\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
+ + "\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
+ + "\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
+ + ",.;()[]{}!?:\"'’‘„“”…\\/\t\n", true);
+ while (st.hasMoreElements()) {
+ tokens.add(st.nextToken());
+ }
+ return tokens;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/gl/GalicianWordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/gl/GalicianWordTokenizer.java
new file mode 100644
index 0000000..6a1919e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/gl/GalicianWordTokenizer.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.gl;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+/**
+ * Tokenizes a sentence into words. Punctuation and whitespace gets its own token.
+ *
+ * @author Daniel Naber
+ */
+public class GalicianWordTokenizer implements Tokenizer {
+
+ public GalicianWordTokenizer() {
+ }
+
+ public List<String> tokenize(final String text) {
+ final List<String> tokens = new ArrayList<String>();
+ final StringTokenizer st = new StringTokenizer(text,
+ "\u0020\u00A0\u115f\u1160\u1680"
+ + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ + "\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
+ + "\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
+ + "\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
+ + "\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
+ + ",.;()[]{}¿¡!?:\"'’‘„“”…\\/\t\n", true);
+ while (st.hasMoreElements()) {
+ tokens.add(st.nextToken());
+ }
+ return tokens;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ml/MalayalamWordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ml/MalayalamWordTokenizer.java
new file mode 100644
index 0000000..ebd7ce3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ml/MalayalamWordTokenizer.java
@@ -0,0 +1,55 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.ml;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+/**
+ * Tokenizes a sentence into words. Punctuation and whitespace gets its own token.
+ *
+ * @author Daniel Naber
+ */
+public class MalayalamWordTokenizer implements Tokenizer {
+
+ public MalayalamWordTokenizer() {
+ }
+
+ public List<String> tokenize(final String text) {
+ final List<String> tokens = new ArrayList<String>();
+ final StringTokenizer st = new StringTokenizer(text,
+ "\u0020\u00A0\u115f\u1160\u1680"
+ /**
+ + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ + "\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
+ + "\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
+ + "\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
+ + "\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
+ **/
+ + ",.;()[]{}!?:\"'’‘„“”…\\/\t\n", true);
+ while (st.hasMoreElements()) {
+ tokens.add(st.nextToken());
+ }
+ return tokens;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizer.java
new file mode 100644
index 0000000..7b12536
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/nl/DutchWordTokenizer.java
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2008 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tokenizers.nl;
+
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.tokenizers.WordTokenizer;
+
+public class DutchWordTokenizer extends WordTokenizer {
+
+ public DutchWordTokenizer() {
+ }
+
+ /**
+ * Tokenizes just like WordTokenizer with the exception for words such as
+ * "oma's" that contains an apostrophe in their middle.
+ *
+ * @param text
+ * - Text to tokenize
+ * @return List of tokens.
+ *
+ * Note: a special string ##NL_APOS## is used to replace apostrophe
+ * during tokenizing.
+ */
+ public List<String> tokenize(final String text) {
+ // TODO: find a cleaner implementation, this is a hack
+ final List<String> tokenList = super.tokenize(text.replaceAll(
+ "([\\p{L}])'([\\p{L}])", "$1##NL_APOS##$2"));
+ final String[] tokens = tokenList.toArray(new String[tokenList.size()]);
+ for (int i = 0; i < tokens.length; i++) {
+ tokens[i] = tokens[i].replace("##NL_APOS##", "'");
+ }
+ return Arrays.asList(tokens);
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizer.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizer.java
new file mode 100644
index 0000000..42fa76e
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tokenizers/ro/RomanianWordTokenizer.java
@@ -0,0 +1,56 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tokenizers.ro;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import de.danielnaber.languagetool.tokenizers.Tokenizer;
+
+/**
+ * Tokenizes a sentence into words. Punctuation and whitespace gets its own
+ * token. Like EnglishWordTokenizer except for some characters: eg: "-'
+ *
+ * @author Ionuț Păduraru
+ * @since 20.02.2009 19:53:50
+ */
+public class RomanianWordTokenizer implements Tokenizer {
+
+ public RomanianWordTokenizer() {
+ }
+
+ public List<String> tokenize(final String text) {
+ List<String> l = new ArrayList<String>();
+ StringTokenizer st = new StringTokenizer(
+ text,
+ "\u0020\u00A0\u115f\u1160\u1680"
+ + "\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+ + "\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
+ + "\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
+ + "\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
+ + "\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
+ + ",.;()[]{}!?:\"'’‘„“”…\\/\t\n\r«»<>%°" + "-|=", true);
+ while (st.hasMoreElements()) {
+ l.add(st.nextToken());
+ }
+ return l;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore
new file mode 100644
index 0000000..b71c741
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore
@@ -0,0 +1 @@
+EnglishTaggerExtract.java
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java
new file mode 100644
index 0000000..9735cac
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java
@@ -0,0 +1,232 @@
+/* ReflectionUtils, helper methods to load classes dynamically
+ * Copyright (C) 2007 Andriy Rysin, Marcin Milkowski, Daniel Naber
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Modifier;
+import java.net.JarURLConnection;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.*;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+public final class ReflectionUtils {
+
+ private ReflectionUtils() {
+ // a static singleton class
+ }
+
+ /**
+ * @param classLoader
+ * Classloader to use for loading classes
+ * @param packageName
+ * Package name to check classes in
+ * @param classNameRegEx
+ * If not null limit class names to this regexp. This parameter is
+ * checked before class is loaded so use it to improve performance by
+ * skipping loading extra classes
+ * @param subdirLevel
+ * If more than 0 all subdirectories/subpackages up to
+ * <code>dirLevel</code> will be traversed This parameter is checked
+ * before class is loaded - use it to improve performance by skipping
+ * loading extra classes
+ * @param classExtends
+ * If not null return only classes which extend this class
+ * @param interfaceImplements
+ * If not null return only classes which implement this interface
+ * @return Returns all classes inside given package
+ * @throws ClassNotFoundException
+ */
+ public static Class[] findClasses(final ClassLoader classLoader,
+ final String packageName, final String classNameRegEx,
+ final int subdirLevel, final Class classExtends,
+ final Class interfaceImplements) throws ClassNotFoundException {
+ final Map<Class,String> foundClasses = new HashMap<Class,String>();
+
+ try {
+ final String packagePath = packageName.replace('.', '/');
+ final Enumeration<URL> resources_ = classLoader.getResources(packagePath);
+
+ final Set<URI> uniqResources = new HashSet<URI>();
+ while (resources_.hasMoreElements()) {
+ final URI resource = resources_.nextElement().toURI();
+ uniqResources.add(resource);
+ }
+
+ for (final URI res : uniqResources) {
+ final URL resource = res.toURL();
+ // System.err.println("trying resource: " + resource);
+ // jars and directories are treated differently
+ if (resource.getProtocol().startsWith("jar")) {
+ findClassesInJar(packageName, classNameRegEx, subdirLevel,
+ classExtends, interfaceImplements, foundClasses, resource);
+ } else {
+ findClassesInDirectory(classLoader, packageName, classNameRegEx,
+ subdirLevel, classExtends, interfaceImplements, foundClasses,
+ resource);
+ }
+ }
+ } catch (final Exception ex) {
+ throw new ClassNotFoundException("Loading rules failed: "
+ + ex.getMessage(), ex);
+ }
+
+ return foundClasses.keySet().toArray(new Class[foundClasses.size()]);
+ }
+
+ private static void findClassesInDirectory(final ClassLoader classLoader,
+ final String packageName, final String classNameRegEx,
+ final int subdirLevel, final Class classExtends,
+ final Class interfaceImplements, final Map<Class,String> foundClasses,
+ final URL resource) throws Exception {
+ final File directory = new File(resource.toURI());
+
+ if (!directory.exists() && !directory.isDirectory()) {
+ throw new Exception("directory does not exist: "
+ + directory.getAbsolutePath());
+ }
+
+ // read classes
+ for (final File file : directory.listFiles()) {
+ if (file.isFile() && file.getName().endsWith(".class")) {
+ final String classShortNm = file.getName().substring(0,
+ file.getName().lastIndexOf('.'));
+ if (classNameRegEx == null || classShortNm.matches(classNameRegEx)) {
+ final Class clazz = Class.forName(packageName + "." + classShortNm);
+
+ if (!isMaterial(clazz)) {
+ continue;
+ }
+
+ if (classExtends == null
+ || isExtending(clazz, classExtends.getName())
+ && interfaceImplements == null
+ || isImplementing(clazz, interfaceImplements)) {
+ foundClasses.put(clazz, file.getAbsolutePath());
+ // System.err.println("Added rule from dir: " + classShortNm);
+ }
+ }
+ }
+ }
+
+ // then subdirectories if we're traversing
+ if (subdirLevel > 0) {
+ for (final File dir : directory.listFiles()) {
+ if (dir.isDirectory()) {
+ final Class[] subLevelClasses = findClasses(classLoader, packageName
+ + "." + dir.getName(), classNameRegEx, subdirLevel - 1,
+ classExtends, interfaceImplements);
+ for (Class tmpClass : subLevelClasses) {
+ foundClasses.put(tmpClass, "dir:" + dir.getAbsolutePath());
+ }
+ }
+ }
+ }
+ }
+
+ private static void findClassesInJar(final String packageName,
+ final String classNameRegEx, final int subdirLevel,
+ final Class classExtends, final Class interfaceImplements,
+ final Map<Class,String> foundClasses, final URL resource) throws IOException,
+ URISyntaxException, ClassNotFoundException {
+ final JarURLConnection conn = (JarURLConnection) resource.openConnection();
+ final JarFile currentFile = conn.getJarFile(); // new JarFile(new
+ // File(resource.toURI()));
+ // jars are flat containers:
+ for (final Enumeration<JarEntry> e = currentFile.entries(); e
+ .hasMoreElements();) {
+ final JarEntry current = e.nextElement();
+ final String name = current.getName();
+ // System.err.println("jar entry: " + name);
+
+ if (name.endsWith(".class")) {
+ final String classNm = name.replaceAll("/", ".").replace(".class", "");
+ final int pointIdx = classNm.lastIndexOf('.');
+ final String classShortNm = pointIdx == -1 ? classNm : classNm
+ .substring(pointIdx + 1);
+
+ if (classNm.startsWith(packageName)
+ && (classNameRegEx == null || classShortNm.matches(classNameRegEx))) {
+ final String subName = classNm.substring(packageName.length() + 1);
+
+ if (countOccurrences(subName, '.') > subdirLevel) {
+ continue;
+ }
+
+ final Class clazz = Class.forName(classNm);
+ if (foundClasses.containsKey(clazz)) {
+ throw new RuntimeException("Duplicate class definition:\n"
+ + clazz.getName() + ", found in\n" + currentFile.getName() + " and\n"
+ + foundClasses.get(clazz));
+ }
+
+ if (!isMaterial(clazz)) {
+ continue;
+ }
+
+ if (classExtends == null
+ || isExtending(clazz, classExtends.getName())
+ && interfaceImplements == null
+ || isImplementing(clazz, interfaceImplements)) {
+ foundClasses.put(clazz, currentFile.getName());
+ // System.err.println("Added class from jar: " + name);
+ }
+ }
+ }
+ }
+ }
+
+ private static int countOccurrences(final String str, final char ch) {
+ int i = 0;
+ int pos = str.indexOf(ch, 0);
+ while (pos != -1) {
+ i++;
+ pos = str.indexOf(ch, pos + 1);
+ }
+ return i;
+ }
+
+ private static boolean isMaterial(final Class clazz) {
+ final int mod = clazz.getModifiers();
+ return !Modifier.isAbstract(mod) && !Modifier.isInterface(mod);
+ }
+
+ /**
+ * @return Returns true if clazz extends superClassName
+ */
+ private static boolean isExtending(final Class clazz,
+ final String superClassName) {
+ Class tmpSuperClass = clazz.getSuperclass();
+ while (tmpSuperClass != null) {
+ if (superClassName.equals(tmpSuperClass.getName())) {
+ return true;
+ }
+ tmpSuperClass = tmpSuperClass.getSuperclass();
+ }
+ return false;
+ }
+
+ private static boolean isImplementing(final Class clazz, final Class interfaze) {
+ return Arrays.asList(clazz.getInterfaces()).contains(interfaze);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java
new file mode 100644
index 0000000..af266f3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java
@@ -0,0 +1,581 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.regex.Pattern;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.gui.Tools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * Tools for reading files etc.
+ *
+ * @author Daniel Naber
+ */
+public final class StringTools {
+
+ private static final int DEFAULT_CONTEXT_SIZE = 25;
+
+ /**
+ * Constants for printing XML rule matches.
+ */
+ public static enum XmlPrintMode {
+ /**
+ * Normally output the rule matches by starting and
+ * ending the XML output on every call.
+ */
+ NORMAL_XML,
+ /**
+ * Start XML output by printing the preamble and the
+ * start of the root element.
+ */
+ START_XML,
+ /**
+ * End XML output by closing the root element.
+ */
+ END_XML,
+ /**
+ * Simply continue rule match output.
+ */
+ CONTINUE_XML
+ }
+
+ private static final Pattern XML_COMMENT_PATTERN = Pattern.compile("<!--.*?-->", Pattern.DOTALL);
+ private static final Pattern XML_PATTERN = Pattern.compile("(?<!<)<[^<>]+>", Pattern.DOTALL);
+
+
+ private StringTools() {
+ // only static stuff
+ }
+
+ /**
+ * Throw exception if the given string is null or empty or only whitespace.
+ */
+ public static void assureSet(final String s, final String varName) {
+ if (s == null) {
+ throw new NullPointerException(varName + " cannot be null");
+ }
+ if (isEmpty(s.trim())) {
+ throw new IllegalArgumentException(varName
+ + " cannot be empty or whitespace only");
+ }
+ }
+
+ /**
+ * Read a file's content.
+ */
+ public static String readFile(final InputStream file) throws IOException {
+ return readFile(file, null);
+ }
+
+ /**
+ * Read the text file using the given encoding.
+ *
+ * @param file
+ * InputStream to a file to be read
+ * @param encoding
+ * the file's character encoding (e.g. <code>iso-8859-1</code>)
+ * @return a string with the file's content, lines separated by
+ * <code>\n</code>
+ * @throws IOException
+ */
+ public static String readFile(final InputStream file, final String encoding)
+ throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ final StringBuilder sb = new StringBuilder();
+ try {
+ if (encoding == null) {
+ isr = new InputStreamReader(file);
+ } else {
+ isr = new InputStreamReader(file, encoding);
+ }
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ sb.append('\n');
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns true if <code>str</code> is made up of all-uppercase characters
+ * (ignoring characters for which no upper-/lowercase distinction exists).
+ */
+ public static boolean isAllUppercase(final String str) {
+ return str.equals(str.toUpperCase());
+ }
+
+ /**
+ * @param str - input str
+ * Returns true if str is MixedCase.
+ */
+ public static boolean isMixedCase(final String str) {
+ return !isAllUppercase(str)
+ && !isCapitalizedWord(str)
+ && !str.equals(str.toLowerCase());
+ }
+
+ /**
+ * @param str - input string
+ */
+ public static boolean isCapitalizedWord(final String str) {
+ if (isEmpty(str)) {
+ return false;
+ }
+ final char firstChar = str.charAt(0);
+ if (Character.isUpperCase(firstChar)) {
+ return str.substring(1).equals(str.substring(1).toLowerCase());
+ }
+ return false;
+ }
+
+ /**
+ * Whether the first character of <code>str</code> is an uppercase character.
+ */
+ public static boolean startsWithUppercase(final String str) {
+ if (isEmpty(str)) {
+ return false;
+ }
+ final char firstChar = str.charAt(0);
+ if (Character.isUpperCase(firstChar)) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * uppercase character. If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ public static String uppercaseFirstChar(final String str) {
+ return changeFirstCharCase(str, true);
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * lowercase character. If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ public static String lowercaseFirstChar(final String str) {
+ return changeFirstCharCase(str, false);
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * lowercase or uppercase character, depending on <code>toUpperCase</code>.
+ * If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ private static String changeFirstCharCase(final String str, final boolean toUpperCase) {
+ if (isEmpty(str)) {
+ return str;
+ }
+ if (str.length() == 1) {
+ return toUpperCase ? str.toUpperCase() : str.toLowerCase();
+ }
+ int pos = 0;
+ final int len = str.length() - 1;
+ while (!Character.isLetterOrDigit(str.charAt(pos)) && len > pos) {
+ pos++;
+ }
+ final char firstChar = str.charAt(pos);
+ return str.substring(0, pos)
+ + (toUpperCase ? Character.toUpperCase(firstChar) : Character.toLowerCase(firstChar))
+ + str.substring(pos + 1);
+ }
+
+ public static String readerToString(final Reader reader) throws IOException {
+ final StringBuilder sb = new StringBuilder();
+ int readBytes = 0;
+ final char[] chars = new char[4000];
+ while (readBytes >= 0) {
+ readBytes = reader.read(chars, 0, 4000);
+ if (readBytes <= 0) {
+ break;
+ }
+ sb.append(new String(chars, 0, readBytes));
+ }
+ return sb.toString();
+ }
+
+ public static String streamToString(final InputStream is) throws IOException {
+ final InputStreamReader isr = new InputStreamReader(is);
+ try {
+ return readerToString(isr);
+ } finally {
+ isr.close();
+ }
+ }
+
+ /**
+ * Calls escapeHTML(String).
+ */
+ public static String escapeXML(final String s) {
+ return escapeHTML(s);
+ }
+
+ /**
+ * Escapes these characters: less than, bigger than, quote, ampersand.
+ */
+ public static String escapeHTML(final String s) {
+ // this version is much faster than using s.replaceAll
+ final StringBuilder sb = new StringBuilder();
+ final int n = s.length();
+ for (int i = 0; i < n; i++) {
+ final char c = s.charAt(i);
+ switch (c) {
+ case '<':
+ sb.append("&lt;");
+ break;
+ case '>':
+ sb.append("&gt;");
+ break;
+ case '&':
+ sb.append("&amp;");
+ break;
+ case '"':
+ sb.append("&quot;");
+ break;
+
+ default:
+ sb.append(c);
+ break;
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Get an XML representation of the given rule matches.
+ *
+ * @param text
+ * the original text that was checked, used to get the context of the
+ * matches
+ * @param contextSize
+ * the desired context size in characters
+ * @deprecated Use {@link #ruleMatchesToXML(List,String,int,XmlPrintMode)} instead
+ */
+ public static String ruleMatchesToXML(final List<RuleMatch> ruleMatches,
+ final String text, final int contextSize) {
+ return ruleMatchesToXML(ruleMatches, text, contextSize, XmlPrintMode.NORMAL_XML);
+ }
+
+ /**
+ * Get an XML representation of the given rule matches.
+ * @param text
+ * the original text that was checked, used to get the context of the
+ * matches
+ * @param contextSize
+ * the desired context size in characters
+ * @param xmlMode how to print the XML
+ */
+ public static String ruleMatchesToXML(final List<RuleMatch> ruleMatches,
+ final String text, final int contextSize, final XmlPrintMode xmlMode) {
+ //
+ // IMPORTANT: people rely on this format, don't change it!
+ //
+ final StringBuilder xml = new StringBuilder();
+
+ if (xmlMode == XmlPrintMode.NORMAL_XML || xmlMode == XmlPrintMode.START_XML) {
+ xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ xml.append("<matches>\n");
+ }
+
+ for (final RuleMatch match : ruleMatches) {
+ String subId = "";
+ if (match.getRule() instanceof PatternRule) {
+ final PatternRule pRule = (PatternRule) match.getRule();
+ if (pRule.getSubId() != null) {
+ subId = " subId=\"" + escapeXMLForAPIOutput(pRule.getSubId()) + "\" ";
+ }
+ }
+ xml.append("<error" + " fromy=\"" + match.getLine() + "\"" + " fromx=\""
+ + (match.getColumn() - 1) + "\"" + " toy=\"" + match.getEndLine() + "\""
+ + " tox=\"" + (match.getEndColumn() - 1) + "\"" + " ruleId=\""
+ + match.getRule().getId() + "\"");
+ final String msg = match.getMessage().replaceAll("</?suggestion>", "'");
+ xml.append(subId);
+ xml.append(" msg=\"" + escapeXMLForAPIOutput(msg) + "\"");
+ final String START_MARKER = "__languagetool_start_marker";
+ String context = Tools.getContext(match.getFromPos(), match.getToPos(),
+ text, contextSize, START_MARKER, "", true);
+ xml.append(" replacements=\""
+ + escapeXMLForAPIOutput(listToString(
+ match.getSuggestedReplacements(), "#")) + "\"");
+ // get position of error in context and remove artificial marker again:
+ final int contextOffset = context.indexOf(START_MARKER);
+ context = context.replaceFirst(START_MARKER, "");
+ context = context.replaceAll("[\n\r]", " ");
+ xml.append(" context=\"" + context + "\"");
+ xml.append(" contextoffset=\"" + contextOffset + "\"");
+ xml.append(" errorlength=\"" + (match.getToPos() - match.getFromPos())
+ + "\"");
+ xml.append("/>\n");
+ }
+ if (xmlMode == XmlPrintMode.END_XML || xmlMode == XmlPrintMode.NORMAL_XML) {
+ xml.append("</matches>\n");
+ }
+ return xml.toString();
+ }
+
+ private static String escapeXMLForAPIOutput(final String s) {
+ // this is simplified XML, i.e. put the "<error>" in one line:
+ return escapeXML(s).replaceAll("[\n\r]", " ");
+ }
+
+ public static String listToString(final Collection<String> l, final String delimiter) {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = l.iterator(); iter.hasNext();) {
+ final String str = iter.next();
+ sb.append(str);
+ if (iter.hasNext()) {
+ sb.append(delimiter);
+ }
+ }
+ return sb.toString();
+ }
+
+ public static String getContext(final int fromPos, final int toPos,
+ final String fileContents) {
+ return getContext(fromPos, toPos, fileContents, DEFAULT_CONTEXT_SIZE);
+ }
+
+ public static String getContext(final int fromPos, final int toPos,
+ final String contents, final int contextSize) {
+ final String fileContents = contents.replace('\n', ' ');
+ // calculate context region:
+ int startContent = fromPos - contextSize;
+ String prefix = "...";
+ String postfix = "...";
+ String markerPrefix = " ";
+ if (startContent < 0) {
+ prefix = "";
+ markerPrefix = "";
+ startContent = 0;
+ }
+ int endContent = toPos + contextSize;
+ if (endContent > fileContents.length()) {
+ postfix = "";
+ endContent = fileContents.length();
+ }
+ // make "^" marker. inefficient but robust implementation:
+ final StringBuilder marker = new StringBuilder();
+ for (int i = 0; i < fileContents.length() + prefix.length(); i++) {
+ if (i >= fromPos && i < toPos) {
+ marker.append('^');
+ } else {
+ marker.append(' ');
+ }
+ }
+ // now build context string plus marker:
+ final StringBuilder sb = new StringBuilder();
+ sb.append(prefix);
+ sb.append(fileContents.substring(startContent, endContent));
+ sb.append(postfix);
+ sb.append('\n');
+ sb.append(markerPrefix);
+ sb.append(marker.substring(startContent, endContent));
+ return sb.toString();
+ }
+
+ /**
+ * Filters any whitespace characters. Useful for trimming the contents of
+ * token elements that cannot possibly contain any spaces.
+ *
+ * @param str
+ * String to be filtered.
+ * @return Filtered string.
+ */
+ public static String trimWhitespace(final String str) {
+ final StringBuilder filter = new StringBuilder();
+ for (int i = 0; i < str.length(); i++) {
+ final char c = str.charAt(i);
+ if (c != '\n' && c != ' ' && c != '\t') {
+ filter.append(c);
+ }
+ }
+ return filter.toString();
+ }
+
+ /**
+ * Adds spaces before words that are not punctuation.
+ *
+ * @param word
+ * Word to add the preceding space.
+ * @param language
+ * Language of the word (to check typography conventions). Currently
+ * French convention of not adding spaces only before '.' and ',' is
+ * implemented; other languages assume that before ,.;:!? no spaces
+ * should be added.
+ * @return String containing a space or an empty string.
+ */
+ public static String addSpace(final String word, final Language language) {
+ String space = " ";
+ final int len = word.length();
+ if (len == 1) {
+ final char c = word.charAt(0);
+ if (Language.FRENCH.equals(language)) {
+ if (c == '.' || c == ',') {
+ space = "";
+ }
+ } else {
+ if (c == '.' || c == ',' || c == ';' || c == ':' || c == '?'
+ || c == '!') {
+ space = "";
+ }
+ }
+ }
+ return space;
+ }
+
+ /**
+ * Returns translation of the UI element without the control character "&". To
+ * have "&" in the UI, use "&&".
+ *
+ * @param label
+ * Label to convert.
+ * @return String UI element string without mnemonics.
+ */
+ public static String getLabel(final String label) {
+ return label.replaceAll("&([^&])", "$1").
+ replaceAll("&&", "&");
+ }
+
+ /**
+ * Returns the UI element string with mnemonics encoded in OpenOffice.org
+ * convention (using "~").
+ *
+ * @param label
+ * Label to convert
+ * @return String UI element with ~ replacing &.
+ */
+ public static String getOOoLabel(final String label) {
+ return label.replaceAll("&([^&])", "~$1").
+ replaceAll("&&", "&");
+ }
+
+ /**
+ * Returns mnemonic of a UI element.
+ *
+ * @param label
+ * String Label of the UI element
+ * @return @char Mnemonic of the UI element, or \u0000 in case of no mnemonic
+ * set.
+ */
+ public static char getMnemonic(final String label) {
+ int mnemonicPos = label.indexOf('&');
+ while (mnemonicPos != -1 && mnemonicPos == label.indexOf("&&")
+ && mnemonicPos < label.length()) {
+ mnemonicPos = label.indexOf('&', mnemonicPos + 2);
+ }
+ if (mnemonicPos == -1 || mnemonicPos == label.length()) {
+ return '\u0000';
+ }
+ return label.charAt(mnemonicPos + 1);
+ }
+
+ /**
+ * Checks if a string contains only whitespace, including all Unicode
+ * whitespace.
+ *
+ * @param str
+ * String to check
+ * @return true if the string is whitespace-only.
+ */
+ public static boolean isWhitespace(final String str) {
+ if ("\u0002".equals(str) // unbreakable field, e.g. a footnote number in OOo
+ || "\u0001".equals(str)) { // breakable field in OOo
+ return false;
+ }
+ final String trimStr = str.trim();
+ if (isEmpty(trimStr)) {
+ return true;
+ }
+ if (trimStr.length() == 1) {
+ return java.lang.Character.isWhitespace(trimStr.charAt(0));
+ }
+ return false;
+ }
+
+ /**
+ *
+ * @param ch
+ * Character to check
+ * @return True if the character is a positive number (decimal digit from 1 to
+ * 9).
+ */
+ public static boolean isPositiveNumber(final char ch) {
+ return ch >= '1' && ch <= '9';
+ }
+
+ /**
+ * Helper method to replace calls to "".equals().
+ *
+ * @param str
+ * String to check
+ * @return true if string is empty OR null
+ */
+ public static boolean isEmpty(final String str) {
+ return str == null || str.length() == 0;
+ }
+
+ /**
+ * Simple XML filtering routing
+ * @param str XML string to be filtered.
+ * @return Filtered string without XML tags.
+ */
+ public static String filterXML(final String str) {
+ String s = str;
+ s = XML_COMMENT_PATTERN.matcher(s).replaceAll(" ");
+ s = XML_PATTERN.matcher(s).replaceAll("");
+ return s;
+ }
+
+ public static String asString(final CharSequence s) {
+ if (s == null) {
+ return null;
+ }
+ return s.toString();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java
new file mode 100644
index 0000000..6a6432a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java
@@ -0,0 +1,37 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tools;
+
+/**
+ * Helper class for GenericUnpairedBracketsRule to identify
+ * symbols indexed with integers.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class SymbolLocator {
+ public String symbol;
+ public int index;
+
+ public SymbolLocator(final String symbol, final int index) {
+ this.symbol = symbol;
+ this.index = index;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java
new file mode 100644
index 0000000..c5d1984
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java
@@ -0,0 +1,626 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.bitext.BitextReader;
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.bitext.BitextRule;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRuleLoader;
+import de.danielnaber.languagetool.rules.patterns.bitext.FalseFriendsAsBitextLoader;
+import de.danielnaber.languagetool.tools.StringTools.XmlPrintMode;
+
+public final class Tools {
+
+ private static final int DEFAULT_CONTEXT_SIZE = 45;
+
+ private Tools() {
+ // cannot construct, static methods only
+ }
+
+ /**
+ * Tags text using the LanguageTool tagger.
+ *
+ * @param contents
+ * Text to tag.
+ * @param lt
+ * LanguageTool instance
+ * @throws IOException
+ */
+ public static void tagText(final String contents, final JLanguageTool lt)
+ throws IOException {
+ AnalyzedSentence analyzedText;
+ final List<String> sentences = lt.sentenceTokenize(contents);
+ for (final String sentence : sentences) {
+ analyzedText = lt.getAnalyzedSentence(sentence);
+ System.out.println(analyzedText.toString());
+ }
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt)
+ throws IOException {
+ return checkText(contents, lt, false, -1, 0, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt, final int lineOffset)
+ throws IOException {
+ return checkText(contents, lt, false, -1, lineOffset, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt,
+ final boolean apiFormat, final int lineOffset) throws IOException {
+ return checkText(contents, lt, apiFormat, -1, lineOffset, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ /**
+ * Check the given text and print results to System.out.
+ *
+ * @param contents
+ * a text to check (may be more than one sentence)
+ * @param lt
+ * Initialized LanguageTool
+ * @param apiFormat
+ * whether to print the result in a simple XML format
+ * @param contextSize
+ * error text context size: -1 for default
+ * @param lineOffset
+ * line number offset to be added to line numbers in matches
+ * @param prevMatches
+ * number of previously matched rules
+ * @param xmlMode
+ * mode of xml printout for simple xml output
+ * @return
+ * Number of rule matches to the input text.
+ * @throws IOException
+ */
+ public static int checkText(final String contents, final JLanguageTool lt,
+ final boolean apiFormat, int contextSize, final int lineOffset,
+ final int prevMatches, final XmlPrintMode xmlMode) throws IOException {
+ if (contextSize == -1) {
+ contextSize = DEFAULT_CONTEXT_SIZE;
+ }
+ final long startTime = System.currentTimeMillis();
+ final List<RuleMatch> ruleMatches = lt.check(contents);
+ // adjust line numbers
+ for (RuleMatch r : ruleMatches) {
+ r.setLine(r.getLine() + lineOffset);
+ r.setEndLine(r.getEndLine() + lineOffset);
+ }
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(ruleMatches, contents,
+ contextSize, xmlMode);
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(ruleMatches, prevMatches, contents, contextSize);
+ }
+
+ //display stats if it's not in a buffered mode
+ if (xmlMode == StringTools.XmlPrintMode.NORMAL_XML) {
+ displayTimeStats(startTime, lt.getSentenceCount(), apiFormat);
+ }
+ return ruleMatches.size();
+ }
+
+ private static void displayTimeStats(final long startTime,
+ final long sentCount, final boolean apiFormat) {
+ final long endTime = System.currentTimeMillis();
+ final long time = endTime - startTime;
+ final float timeInSeconds = time / 1000.0f;
+ final float sentencesPerSecond = sentCount / timeInSeconds;
+ if (apiFormat) {
+ System.out.println("<!--");
+ }
+ System.out.printf(Locale.ENGLISH,
+ "Time: %dms for %d sentences (%.1f sentences/sec)", time,
+ sentCount, sentencesPerSecond);
+ System.out.println();
+ if (apiFormat) {
+ System.out.println("-->");
+ }
+ }
+
+ /**
+ * Displays matches in a simple text format.
+ * @param ruleMatches Matches from rules.
+ * @param prevMatches Number of previously found matches.
+ * @param contents The text that was checked.
+ * @param contextSize The size of contents displayed.
+ * @since 1.0.1
+ */
+ private static void printMatches(final List<RuleMatch> ruleMatches,
+ final int prevMatches, final String contents, final int contextSize) {
+ int i = 1;
+ for (final RuleMatch match : ruleMatches) {
+ String output = i + prevMatches + ".) Line " + (match.getLine() + 1) + ", column "
+ + match.getColumn() + ", Rule ID: " + match.getRule().getId();
+ if (match.getRule() instanceof PatternRule) {
+ final PatternRule pRule = (PatternRule) match.getRule();
+ output += "[" + pRule.getSubId() + "]";
+ }
+ System.out.println(output);
+ String msg = match.getMessage();
+ msg = msg.replaceAll("<suggestion>", "'");
+ msg = msg.replaceAll("</suggestion>", "'");
+ System.out.println("Message: " + msg);
+ final List<String> replacements = match.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ System.out.println("Suggestion: "
+ + StringTools.listToString(replacements, "; "));
+ }
+ System.out.println(StringTools.getContext(match.getFromPos(), match
+ .getToPos(), contents, contextSize));
+ if (i < ruleMatches.size()) {
+ System.out.println();
+ }
+ i++;
+ }
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * NOTE: the positions returned by the rule matches are relative
+ * to the target string only, and always start at the first line
+ * and first column, no matter how many lines were checked before.
+ * To have multiple lines taken into account, use the checkBitext
+ * method that takes a BitextReader.
+ *
+ * @param src Source text.
+ * @param trg Target text.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @param apiFormat Whether API format should be used.
+ * @param xmlMode The mode of XML output display.
+ * @return The number of rules matched on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static int checkBitext(final String src, final String trg,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules,
+ final boolean apiFormat, final XmlPrintMode xmlMode) throws IOException {
+ final long startTime = System.currentTimeMillis();
+ final int contextSize = DEFAULT_CONTEXT_SIZE;
+ final List<RuleMatch> ruleMatches =
+ checkBitext(src, trg, srcLt, trgLt, bRules);
+ for (RuleMatch thisMatch : ruleMatches) {
+ thisMatch =
+ trgLt.adjustRuleMatchPos(thisMatch,
+ 0, 1, 1, trg);
+ }
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(ruleMatches, trg,
+ contextSize, xmlMode);
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(ruleMatches, 0, trg, contextSize);
+ }
+ //display stats if it's not in a buffered mode
+ if (xmlMode == StringTools.XmlPrintMode.NORMAL_XML) {
+ displayTimeStats(startTime, srcLt.getSentenceCount(), apiFormat);
+ }
+ return ruleMatches.size();
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * NOTE: the positions returned by the rule matches are adjusted
+ * according to the data returned by the reader.
+ *
+ * @param reader Reader of bitext strings.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @param apiFormat Whether API format should be used.
+ * @param xmlMode The mode of XML output display.
+ * @return The number of rules matched on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static int checkBitext(final BitextReader reader,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules,
+ final boolean apiFormat) throws IOException {
+ final long startTime = System.currentTimeMillis();
+ final int contextSize = DEFAULT_CONTEXT_SIZE;
+ XmlPrintMode xmlMode = StringTools.XmlPrintMode.START_XML;
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ int matchCount = 0;
+ int sentCount = 0;
+ for (StringPair srcAndTrg : reader) {
+ final List<RuleMatch> curMatches = checkBitext(
+ srcAndTrg.getSource(), srcAndTrg.getTarget(),
+ srcLt, trgLt, bRules);
+ final List<RuleMatch> fixedMatches = new ArrayList<RuleMatch>();
+ for (RuleMatch thisMatch : curMatches) {
+ fixedMatches.add(
+ trgLt.adjustRuleMatchPos(thisMatch,
+ reader.getSentencePosition(),
+ reader.getColumnCount(),
+ reader.getLineCount(),
+ reader.getCurrentLine()));
+ }
+ ruleMatches.addAll(fixedMatches);
+ if (fixedMatches.size() > 0) {
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(fixedMatches,
+ reader.getCurrentLine(),
+ contextSize, xmlMode);
+ if (xmlMode == StringTools.XmlPrintMode.START_XML) {
+ xmlMode = StringTools.XmlPrintMode.CONTINUE_XML;
+ }
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(fixedMatches, matchCount, reader.getCurrentLine(), contextSize);
+ matchCount += fixedMatches.size();
+ }
+ }
+ sentCount++;
+ }
+ displayTimeStats(startTime, sentCount, apiFormat);
+ if (apiFormat) {
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print("</matches>");
+ }
+ return ruleMatches.size();
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * @param src Source text.
+ * @param trg Target text.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @return The list of rule matches on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static List<RuleMatch> checkBitext(final String src, final String trg,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules) throws IOException {
+ final AnalyzedSentence srcText = srcLt.getAnalyzedSentence(src);
+ final AnalyzedSentence trgText = trgLt.getAnalyzedSentence(trg);
+ final List<RuleMatch> ruleMatches = trgLt.checkAnalyzedSentence
+ (JLanguageTool.paragraphHandling.NORMAL,
+ trgLt.getAllRules(), 0, 0, 1, trg, trgText);
+ for (BitextRule bRule : bRules) {
+ final RuleMatch[] curMatch = bRule.match(srcText, trgText);
+ if (curMatch != null) {
+ ruleMatches.addAll(Arrays.asList(curMatch));
+ }
+ }
+ return ruleMatches;
+ }
+
+
+ /**
+ * Gets default bitext rules for a given pair of languages
+ * @param source Source language.
+ * @param target Target language.
+ * @return List of Bitext rules
+ * @throws IOException
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ */
+ public static List<BitextRule> getBitextRules(final Language source,
+ final Language target) throws IOException, ParserConfigurationException, SAXException {
+ final List<BitextRule> bRules = new ArrayList<BitextRule>();
+ //try to load the bitext pattern rules for the language...
+ final BitextPatternRuleLoader ruleLoader = new BitextPatternRuleLoader();
+ final String name = "/" + target.getShortName() + "/bitext.xml";
+ final InputStream is = JLanguageTool.getDataBroker().getFromRulesDirAsStream(name);
+ if (is != null) {
+ bRules.addAll(ruleLoader.getRules(is, name));
+ }
+
+ //load the false friend rules in the bitext mode
+ final FalseFriendsAsBitextLoader fRuleLoader = new FalseFriendsAsBitextLoader();
+ final String fName = "/false-friends.xml";
+ bRules.addAll(fRuleLoader.
+ getFalseFriendsAsBitext(
+ JLanguageTool.getDataBroker().getRulesDir() + fName,
+ source, target));
+
+ //load Java bitext rules
+ // TODO: get ResourceBundle for possible parameters for rules
+ bRules.addAll(getAllBuiltinBitextRules(source, null));
+ return bRules;
+ }
+
+ private static List<BitextRule> getAllBuiltinBitextRules(final Language language,
+ final ResourceBundle messages) {
+ // use reflection to get a list of all non-pattern rules under
+ // "de.danielnaber.languagetool.rules.bitext"
+ // generic rules first, then language-specific ones
+ // TODO: the order of loading classes is not guaranteed so we may want to
+ // implement rule
+ // precedence
+
+ final List<BitextRule> rules = new ArrayList<BitextRule>();
+ try {
+ // we pass ".*Rule$" regexp to improve efficiency, see javadoc
+ final Class[] classes = ReflectionUtils.findClasses(Rule.class
+ .getClassLoader(), Rule.class.getPackage().getName()
+ + ".bitext", ".*Rule$", 0,
+ Rule.class, null);
+
+ for (final Class class1 : classes) {
+ final Constructor[] constructors = class1.getConstructors();
+ for (final Constructor constructor : constructors) {
+ final Class[] paramTypes = constructor.getParameterTypes();
+ if (paramTypes.length == 0) {
+ rules.add((BitextRule) constructor.newInstance());
+ break;
+ }
+ if (paramTypes.length == 1
+ && paramTypes[0].equals(ResourceBundle.class)) {
+ rules.add((BitextRule) constructor.newInstance(messages));
+ break;
+ }
+ if (paramTypes.length == 2
+ && paramTypes[0].equals(ResourceBundle.class)
+ && paramTypes[1].equals(Language.class)) {
+ rules.add((BitextRule) constructor.newInstance(messages, language));
+ break;
+ }
+ throw new RuntimeException("Unknown constructor for rule class: "
+ + class1.getName());
+ }
+ }
+ } catch (final Exception e) {
+ throw new RuntimeException("Failed to load rules: " + e.getMessage(), e);
+ }
+ // System.err.println("Loaded " + rules.size() + " rules");
+ return rules;
+ }
+
+
+ /**
+ * Simple rule profiler - used to run LT on a corpus to see which
+ * rule takes most time.
+ * @param contents - text to check
+ * @param lt - instance of LanguageTool
+ * @return number of matches
+ * @throws IOException
+ */
+ public static void profileRulesOnText(final String contents,
+ final JLanguageTool lt) throws IOException {
+ final long[] workTime = new long[10];
+ int matchCount = 0;
+ final List<Rule> rules = lt.getAllRules();
+ final int ruleCount = rules.size();
+ System.out.printf("Testing %d rules\n", ruleCount);
+ System.out.println("Rule ID\tTime\tSentences\tMatches\tSentences per sec.");
+ final List<String> sentences = lt.sentenceTokenize(contents);
+ for (Rule rule : rules) {
+ matchCount = 0;
+ for (int k = 0; k < 10; k++) {
+ final long startTime = System.currentTimeMillis();
+ for (String sentence : sentences) {
+ matchCount += rule.match
+ (lt.getAnalyzedSentence(sentence)).length;
+ }
+ final long endTime = System.currentTimeMillis();
+ workTime[k] = endTime - startTime;
+ }
+ Arrays.sort(workTime);
+ final long time = median(workTime);
+ final float timeInSeconds = time / 1000.0f;
+ final float sentencesPerSecond = sentences.size() / timeInSeconds;
+ System.out.printf(Locale.ENGLISH,
+ "%s\t%d\t%d\t%d\t%.1f", rule.getId(),
+ time, sentences.size(), matchCount, sentencesPerSecond);
+ System.out.println();
+ }
+ }
+
+ public static int profileRulesOnLine(final String contents,
+ final JLanguageTool lt, final Rule rule) throws IOException {
+ int count = 0;
+ for (final String sentence : lt.sentenceTokenize(contents)) {
+ count += rule.match(lt.getAnalyzedSentence(sentence)).length ;
+ }
+ return count;
+ }
+
+ public static long median(long[] m) {
+ final int middle = m.length / 2; // subscript of middle element
+ if (m.length % 2 == 1) {
+ // Odd number of elements -- return the middle one.
+ return m[middle];
+ }
+ return (m[middle-1] + m[middle]) / 2;
+ }
+
+ /**
+ * Automatically applies suggestions to the text.
+ * Note: if there is more than one suggestion, always the first
+ * one is applied, and others ignored silently.
+ *
+ * @param
+ * contents - String to be corrected
+ * @param
+ * lt - Initialized LanguageTool object
+ * @return
+ * Corrected text as String.
+ */
+ public static String correctText(final String contents, final JLanguageTool lt) throws IOException {
+ final List<RuleMatch> ruleMatches = lt.check(contents);
+ if (ruleMatches.isEmpty()) {
+ return contents;
+ }
+ return correctTextFromMatches(contents, ruleMatches);
+ }
+
+ /**
+ * Automatically applies suggestions to the bilingual text.
+ * Note: if there is more than one suggestion, always the first
+ * one is applied, and others ignored silently.
+ *
+ * @param
+ * reader - a bitext file reader
+ * @param
+ * sourceLanguageTool Initialized source JLanguageTool object
+ * @param
+ * targetLanguageTool Initialized target JLanguageTool object
+ * @param
+ * bRules List of all BitextRules to use
+ */
+ public static void correctBitext(final BitextReader reader,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules) throws IOException {
+ //TODO: implement a bitext writer for XML formats (like XLIFF)
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ for (StringPair srcAndTrg : reader) {
+ final List<RuleMatch> curMatches = checkBitext(
+ srcAndTrg.getSource(), srcAndTrg.getTarget(),
+ srcLt, trgLt, bRules);
+ final List<RuleMatch> fixedMatches = new ArrayList<RuleMatch>();
+ for (RuleMatch thisMatch : curMatches) {
+ fixedMatches.add(
+ trgLt.adjustRuleMatchPos(thisMatch,
+ 0, //don't need to adjust at all, we have zero offset related to trg sentence
+ reader.getTargetColumnCount(),
+ reader.getLineCount(),
+ reader.getCurrentLine()));
+ }
+ ruleMatches.addAll(fixedMatches);
+ if (fixedMatches.size() > 0) {
+ System.out.println(correctTextFromMatches(srcAndTrg.getTarget(),
+ fixedMatches));
+ } else {
+ System.out.println(srcAndTrg.getTarget());
+ }
+ }
+ }
+
+ private static String correctTextFromMatches(
+ final String contents, final List<RuleMatch> matches) {
+ final StringBuilder sb = new StringBuilder(contents);
+ //build error list:
+ final List<String> errors = new ArrayList<String>();
+ for (RuleMatch rm : matches) {
+ final List<String> replacements = rm.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ errors.add(sb.substring(rm.getFromPos(), rm.getToPos()));
+ }
+ }
+ int offset = 0;
+ int counter = 0;
+ for (RuleMatch rm : matches) {
+ final List<String> replacements = rm.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ //make sure the error hasn't been already corrected:
+ if (errors.get(counter).equals(sb.substring(rm.getFromPos() - offset, rm.getToPos() - offset))) {
+ sb.replace(rm.getFromPos() - offset,
+ rm.getToPos() - offset, replacements.get(0));
+ offset += (rm.getToPos() - rm.getFromPos())
+ - replacements.get(0).length();
+ }
+ counter++;
+ }
+ }
+ return sb.toString();
+ }
+
+ public static InputStream getInputStream(final String resourcePath) throws IOException {
+ try {
+ // try the URL first:
+ final URL url = new URL(resourcePath);
+ // success, load the resource.
+ return url.openStream();
+ } catch (final MalformedURLException e) {
+ // no luck. Fallback to class loader paths.
+ }
+ // try file path:
+ final File f = new File(resourcePath);
+ if (f.exists() && f.isFile() && f.canRead()) {
+ return new BufferedInputStream(new FileInputStream(f));
+ }
+ throw new IOException(
+ "Could not open input stream from URL/resource/file: "
+ + f.getAbsolutePath());
+ }
+
+ /**
+ * Get a stacktrace as a string.
+ */
+ public static String getFullStackTrace(final Throwable e) {
+ final StringWriter sw = new StringWriter();
+ final PrintWriter pw = new PrintWriter(sw);
+ e.printStackTrace(pw);
+ return sw.toString();
+ }
+
+ /**
+ * Load a file form the classpath using getResourceAsStream().
+ *
+ * @param filename
+ * @return the stream of the file
+ * @throws IOException
+ * if the file cannot be loaded
+ */
+ public static InputStream getStream(final String filename) throws IOException {
+ // the other ways to load the stream like
+ // "Tools.class.getClass().getResourceAsStream(filename)"
+ // don't work in a web context (using Grails):
+ final InputStream is = Tools.class.getResourceAsStream(filename);
+ if (is == null) {
+ throw new IOException("Could not load file from classpath : " + filename);
+ }
+ return is;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java
new file mode 100644
index 0000000..d7c2bfc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java
@@ -0,0 +1,127 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tools;
+
+import java.util.ArrayList;
+import java.util.EmptyStackException;
+
+/**
+ * Implements unsynchronized stack (contrary to default Java java.util.Stack,
+ * this one is based on ArrayList). Usage is the same as the java.util.Stack.
+ *
+ * @author Marcin Miłkowski.
+ *
+ */
+
+public class UnsyncStack<E> extends ArrayList<E> {
+ /**
+ * Generated automatically.
+ */
+ private static final long serialVersionUID = -4984830372178073605L;
+
+ public UnsyncStack() {
+ }
+
+ /**
+ * Pushes an item onto the top of this stack. This has exactly the same effect
+ * as: <blockquote>
+ *
+ * <pre>
+ * add(item)
+ * </pre>
+ *
+ * </blockquote>
+ *
+ * @param item
+ * the item to be pushed onto this stack.
+ * @return the <code>item</code> argument.
+ * @see java.util.ArrayList#add
+ */
+ public E push(E item) {
+ add(item);
+ return item;
+ }
+
+ /**
+ * Removes the object at the top of this stack and returns that object as the
+ * value of this function.
+ *
+ * @return The object at the top of this stack (the last item of the
+ * <tt>ArrayList</tt> object).
+ * @exception EmptyStackException
+ * if this stack is empty.
+ */
+ public E pop() {
+ E obj;
+ int len = size();
+ obj = peek();
+ remove(len - 1);
+ return obj;
+ }
+
+ /**
+ * Looks at the object at the top of this stack without removing it from the
+ * stack.
+ *
+ * @return the object at the top of this stack (the last item of the
+ * <tt>ArrayList</tt> object).
+ * @exception EmptyStackException
+ * if this stack is empty.
+ */
+ public E peek() {
+ int len = size();
+ if (len == 0)
+ throw new EmptyStackException();
+ return get(len - 1);
+ }
+
+ /**
+ * Tests if this stack is empty.
+ *
+ * @return <code>true</code> if and only if this stack contains no items;
+ * <code>false</code> otherwise.
+ */
+ public boolean empty() {
+ return size() == 0;
+ }
+
+ /**
+ * Returns the 1-based position where an object is on this stack. If the
+ * object <tt>o</tt> occurs as an item in this stack, this method returns the
+ * distance from the top of the stack of the occurrence nearest the top of the
+ * stack; the topmost item on the stack is considered to be at distance
+ * <tt>1</tt>. The <tt>equals</tt> method is used to compare <tt>o</tt> to the
+ * items in this stack.
+ *
+ * @param o
+ * the desired object.
+ * @return the 1-based position from the top of the stack where the object is
+ * located; the return value <code>-1</code> indicates that the object
+ * is not on the stack.
+ */
+ public int search(Object o) {
+ int i = lastIndexOf(o);
+ if (i >= 0) {
+ return size() - i;
+ }
+ return -1;
+ }
+
+}