19 files changed, 1962 insertions, 0 deletions
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/CheckBNC.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/CheckBNC.java
new file mode 100644
index 0000000..24931e6
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/CheckBNC.java
@@ -0,0 +1,105 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.dev;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.List;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TextFilter;
+import de.danielnaber.languagetool.tokenizers.SentenceTokenizer;
+import de.danielnaber.languagetool.tools.StringTools;
+import de.danielnaber.languagetool.tools.Tools;
+
+/**
+ * Uses JLanguageTol recursively on the files of the BNC (British National Corpus).
+ * 
+ * @author Daniel Naber
+ */
+public final class CheckBNC {
+
+  private JLanguageTool langTool = null;
+  private final TextFilter textFilter = new BNCTextFilter();
+
+  static final boolean CHECK_BY_SENTENCE = true;
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      System.out.println("Usage: CheckBNC <directory>");
+      System.exit(1);
+    }
+    final CheckBNC prg = new CheckBNC();
+    prg.run(new File(args[0]));
+  }
+  
+  private CheckBNC() throws IOException {
+    langTool = new JLanguageTool(Language.ENGLISH);
+    langTool.activateDefaultPatternRules();
+    final String[] disRules = new String[] {"UPPERCASE_SENTENCE_START", "COMMA_PARENTHESIS_WHITESPACE",
+        "WORD_REPEAT_RULE", "DOUBLE_PUNCTUATION"};
+    System.err.println("Note: disabling the following rules:");
+    for (String disRule : disRules) {
+      langTool.disableRule(disRule);
+      System.err.println(" " + disRule);
+    }
+  }
+
+  private void run(final File file) throws IOException {
+    if (file.isDirectory()) {
+      final File[] files = file.listFiles();
+      for (File file1 : files) {
+        run(new File(file, file1.getName()));
+      }
+    } else {
+      System.out.println("Checking " + file.getAbsolutePath());
+      String text = StringTools.readFile(new FileInputStream(file.getAbsolutePath()));
+      text = textFilter.filter(text);
+      if (CHECK_BY_SENTENCE) {
+        final SentenceTokenizer st = new SentenceTokenizer();
+        final List<String> sentences = st.tokenize(text);
+        for (String sentence : sentences) {
+          Tools.checkText(sentence, langTool, false, 1000);
+        }
+      } else {
+        Tools.checkText(text, langTool);
+      }
+    }
+  }
+
+}
+
+class BNCTextFilter implements TextFilter {
+
+  public String filter(String text) {
+    text = text.replaceAll("(?s)<header.*?>.*?</header>", "");
+    text = text.replaceAll("<w.*?>", "");
+    text = text.replaceAll("<c.*?>", "");
+    text = text.replaceAll("<.*?>", "");
+    text = text.replaceAll(" +", " ");
+    text = text.replaceAll("&bquo|&equo", "\"");
+    text = text.replaceAll("&mdash;?", "--");
+    text = text.replaceAll("&amp;?", "&");
+    return text;
+  }
+  
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ContextFinder.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ContextFinder.java
new file mode 100644
index 0000000..0154b33
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ContextFinder.java
@@ -0,0 +1,122 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+
+/**
+ * Compare the one-word right or left context of two words. This is useful
+ * to find potential rules for similar words, i.e. contexts that are typical
+ * for only one of the words.  
+ * 
+ * @author Daniel Naber
+ */
+public class ContextFinder {
+
+  private ContextFinder() {}
+
+  public static void main(String[] args) throws IOException {
+    if (args.length != 4 || !args[3].startsWith("--context")) {
+      printUsageAndExit();
+    }
+    final ContextFinder prg = new ContextFinder();
+    if (args[3].endsWith("=right"))
+      prg.run(args[0], args[1], args[2], true);
+    else if (args[3].endsWith("=left"))
+      prg.run(args[0], args[1], args[2], false);
+    else
+      printUsageAndExit();
+  }
+  
+  private static void printUsageAndExit() {
+    System.err.println("Usage: ContextFinder <indexDir> <term1> <term2> --context=right|left"); 
+    System.exit(1);
+  }
+  
+  private void run(String indexDir, String term1, String term2, boolean rightContext) throws IOException {
+    final IndexReader reader = IndexReader.open(indexDir);
+    final IndexSearcher searcher = new IndexSearcher(reader);
+    final TermEnum termEnum = reader.terms();
+    int termCount = 0;
+    System.out.println(term1 + ": " + reader.docFreq(new Term(Indexer.BODY_FIELD, term1)) + "x");
+    System.out.println(term2 + ": " + reader.docFreq(new Term(Indexer.BODY_FIELD, term2)) + "x");
+    while (termEnum.next()) {
+      final Term t = termEnum.term();
+      if (isPOSTag(t))
+        continue;
+      // first term:
+      final PhraseQuery pq1 = makeQuery(t, term1, rightContext);
+      final int hits1 = search(pq1, searcher);
+      // second term:
+      final PhraseQuery pq2 = makeQuery(t, term2, rightContext);
+      final int hits2 = search(pq2, searcher);
+      final float rel = (float)(hits1+1) / (float)(hits2+1);
+      if (rel > 1.0f)
+        System.out.println("#1: " + rel + ": " + myToString(pq1) + ": " + hits1 + " <-> " + myToString(pq2) + ": " + hits2);
+      else if (rel < 1.0f)
+        System.out.println("#2: " + rel + ": " + myToString(pq1) + ": " + hits1 + " <-> " + myToString(pq2) + ": " + hits2);
+      termCount++;
+    }
+    System.out.println("termCount = " + termCount);
+    searcher.close();
+    reader.close();
+  }
+
+  private String myToString(PhraseQuery pq) {
+    return pq.toString().replaceAll("body:", "");
+  }
+
+  private PhraseQuery makeQuery(Term t, String term1, boolean rightContext) {
+    final PhraseQuery pq = new PhraseQuery();
+    if (rightContext) {
+      pq.add(new Term(Indexer.BODY_FIELD, term1));
+      pq.add(new Term(Indexer.BODY_FIELD, t.text()));
+    } else {
+      pq.add(new Term(Indexer.BODY_FIELD, t.text()));
+      pq.add(new Term(Indexer.BODY_FIELD, term1));
+    }
+    return pq;
+  }
+
+  private int search(PhraseQuery pq, IndexSearcher searcher) throws IOException {
+    //long time = System.currentTimeMillis();
+    final Hits h = searcher.search(pq);
+    //long searchTime = System.currentTimeMillis()-time;
+    if (h.length() > 0) {
+      //System.err.println(h.length() + " " + pq);
+      //System.err.println("  " + searchTime + "ms");
+    }
+    return h.length();
+  }
+
+  private boolean isPOSTag(Term t) {
+    if (t.text().equals(t.text().toUpperCase())) {    // e.g. "VER:1:PLU:KJ2:NON:NEB"
+      return true;
+    }
+    return false;
+  }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ExportGermanNouns.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ExportGermanNouns.java
new file mode 100644
index 0000000..ad7d231
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/ExportGermanNouns.java
@@ -0,0 +1,86 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+/*
+ * Created on 06.05.2007
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Set;
+
+import de.danielnaber.languagetool.JLanguageTool;
+
+import morfologik.fsa.FSA;
+
+/**
+ * Export German nouns as a serialized Java HashSet, to be used
+ * by jWordSplitter.  
+ * 
+ * @author Daniel Naber
+ */
+public class ExportGermanNouns {
+
+  private static final String DICT_FILENAME = "/de/german.dict";
+  
+  private ExportGermanNouns() {
+  }
+  
+  private Set<String> getWords() throws IOException {
+    final FSA fsa = FSA.getInstance(JLanguageTool.getDataBroker().getFromResourceDirAsStream(DICT_FILENAME));
+    String lastTerm = null;
+    final Set<String> set = new HashSet<String>();
+    for (ByteBuffer bb : fsa) {
+      final byte [] sequence = new byte [bb.remaining()];
+      bb.get(sequence);
+      final String output = new String(sequence, "iso-8859-1");
+      if (output.indexOf("+SUB:") != -1 && output.indexOf(":ADJ") == -1) {
+        final String[] parts = output.split("\\+");
+        final String term = parts[0].toLowerCase();
+        if (lastTerm == null || !lastTerm.equals(parts[0])) {
+          //System.out.println(parts[0]);
+          set.add(term);
+        }
+        lastTerm = term;
+      }
+    }
+    return set;
+  }
+  
+  private void serialize(Set<String> words, File outputFile) throws IOException {
+    final FileOutputStream fos = new FileOutputStream(outputFile);
+    final ObjectOutputStream oos = new ObjectOutputStream(fos);
+    oos.writeObject(words);
+    oos.close();
+    fos.close();
+  }
+  
+  public static void main(String[] args) throws IOException {
+    if (args.length != 1) {
+      System.out.println("Usage: ExportGermanNouns <outputFile>");
+      System.exit(1);
+    }
+    final ExportGermanNouns prg = new ExportGermanNouns();
+    final Set<String> words = prg.getWords();
+    prg.serialize(words, new File(args[0]));
+  }
+    
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/Indexer.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/Indexer.java
new file mode 100644
index 0000000..fc3392b
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/Indexer.java
@@ -0,0 +1,100 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * POS tag and index text files using Lucene. Required for ContextFinder.java.
+ * TODO: hard-coded to index a specific kind of XML.
+ * 
+ * @author Daniel Naber
+ */
+public class Indexer  {
+
+  static final String BODY_FIELD = "body";
+  
+  private Indexer() {}
+  
+  public static void main(String[] args) throws IOException {
+    final Indexer prg = new Indexer();
+    if (args.length != 2) {
+      System.err.println("Usage: Indexer <dataDir> <indexDir>");
+      System.exit(1);
+    }
+    // FIXME: make this an option:
+    final Language lang = Language.GERMAN;
+    prg.run(args[0], args[1], lang);
+  }
+  
+  private void run(String dataDir, String indexDir, Language lang) throws IOException {
+    final IndexWriter iw = new IndexWriter(indexDir, new POSTagAnalyzer(lang.getTagger()), true);
+    iw.setMaxBufferedDocs(100);
+    index(iw, new File(dataDir), 1);
+    System.out.println("Optimizing index...");
+    iw.optimize();
+    iw.close();
+    System.out.println("Done.");
+  }
+
+  private void index(IndexWriter iw, File dir, int count) throws IOException {
+    if (dir.isDirectory()) {
+      final File[] files = dir.listFiles();
+      for (File file : files) {
+        index(iw, file, ++count);
+      }
+    } else {
+      final Document doc = new Document();
+      if (count % 50 == 0)
+        System.out.println("Indexing file #" + count);
+      String s = StringTools.readFile(new FileInputStream(dir.getAbsolutePath()), "iso-8859-1");
+      // XML data:
+      s = getParagraphs(s);
+      //s = s.replaceAll("(\\w)([.,?!])", "$1 $2");
+      //s = s.replaceAll("<.*?>", "");
+      //System.err.println(">"+s);
+      doc.add(new Field(BODY_FIELD, s, Field.Store.YES, Field.Index.TOKENIZED));
+      iw.addDocument(doc);
+    }
+  }
+
+  private String getParagraphs(String xml) {
+    final StringBuilder sb = new StringBuilder();
+    final Pattern pattern = Pattern.compile("<p>(.*?)</p>", Pattern.DOTALL);
+    final Matcher matcher = pattern.matcher(xml);
+    int pos = 0;
+    while (matcher.find(pos)) {
+      sb.append(matcher.group(1));
+      pos = matcher.end();
+    }
+    return sb.toString();
+  }
+  
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagAnalyzer.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagAnalyzer.java
new file mode 100644
index 0000000..06d6cd4
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagAnalyzer.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+import de.danielnaber.languagetool.tagging.Tagger;
+
+/**
+ * Analyzer that stores text and its POS analysis.
+ * 
+ * @author Daniel Naber
+ */
+class POSTagAnalyzer extends Analyzer {
+
+  private Tagger tagger = null;
+  
+  public POSTagAnalyzer(Tagger tagger) {
+    this.tagger = tagger;
+  }
+
+  public TokenStream tokenStream(@SuppressWarnings("unused")String fieldName, Reader reader) {
+    TokenStream result = new StandardTokenizer(reader);
+    //result = new LowerCaseFilter(result);
+    result = new POSTagFilter(result, tagger);
+    return result;
+  }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagFilter.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagFilter.java
new file mode 100644
index 0000000..01fc600
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagFilter.java
@@ -0,0 +1,94 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.tagging.Tagger;
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings;
+
+/**
+ * Filter that puts the words of a text, the base form, and word's POS tags at the 
+ * same index position.
+ * 
+ * @author Daniel Naber
+ */
+class POSTagFilter extends TokenFilter {
+
+  private static final String BASEFORM_PREFIX = "B_";
+  private static final String TEXTFORM_PREFIX = "T_";
+  
+  private final Stack<Token> stack = new Stack<Token>();
+  private Tagger tagger = null;
+  
+  public POSTagFilter(TokenStream in, Tagger tagger) {
+    super(in);
+    this.tagger = tagger;
+  }
+
+  public final org.apache.lucene.analysis.Token next() throws java.io.IOException {
+
+    if (stack.size() > 0) {
+      //System.err.println("*"+stack.peek());
+      return stack.pop();
+    } else {
+      final Token t = input.next();
+      if (t == null)
+        return null;
+      final List<String> wordList = new ArrayList<String>();
+      wordList.add(t.termText());
+      final List<AnalyzedTokenReadings> atr = tagger.tag(wordList);
+      for (Object anAtr : atr) {
+        final AnalyzedGermanTokenReadings atrs = (AnalyzedGermanTokenReadings) anAtr;
+        final List<AnalyzedToken> ats = atrs.getReadings();
+        for (Object at1 : ats) {
+          final AnalyzedToken at = (AnalyzedToken) at1;
+          if (at.getPOSTag() != null) {
+            //System.err.println(">>>>>"+at.getPOSTag());
+            final Token posToken = new Token(at.getPOSTag(), t.startOffset(), t.endOffset());
+            posToken.setPositionIncrement(0);
+            stack.push(posToken);
+          }
+          final Set<String> indexLemmas = new HashSet<String>();
+          if (at.getLemma() != null) {
+            final String lemma = at.getLemma().toLowerCase();
+            if (!lemma.equalsIgnoreCase(t.termText()) && !indexLemmas.contains(lemma)) {
+              final Token posToken = new Token(BASEFORM_PREFIX + lemma, t.startOffset(), t.endOffset());
+              posToken.setPositionIncrement(0);
+              stack.push(posToken);
+              indexLemmas.add(lemma);
+            }
+          }
+        }
+      }
+      return new Token(TEXTFORM_PREFIX + t.termText().toLowerCase(), t.startOffset(), t.endOffset());
+    }
+  }
+  
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagLanguageModel.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagLanguageModel.java
new file mode 100644
index 0000000..68439cc
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/POSTagLanguageModel.java
@@ -0,0 +1,147 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.dev;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Tag text and display only POS tags to create an n-gram language model.
+ * 
+ * @author Marcin Milkowski
+ */
+public class POSTagLanguageModel {
+
+  /**
+   * @param args
+   * @throws IOException
+   */
+  public static void main(final String[] args) throws IOException {
+    if (args.length == 1) {
+      final Language language = getLanguageOrExit(args[0]);
+      final JLanguageTool lt = new JLanguageTool(language, null);
+      runOnStdIn(lt);
+    } else {
+      exitWithUsageMessage();
+    }
+  }
+
+  private static Language getLanguageOrExit(final String lang) {
+    Language language = null;
+    boolean foundLanguage = false;
+    final List<String> supportedLanguages = new ArrayList<String>();
+    for (final Language tmpLang : Language.LANGUAGES) {
+      supportedLanguages.add(tmpLang.getShortName());
+      if (lang.equals(tmpLang.getShortName())) {
+        language = tmpLang;
+        foundLanguage = true;
+        break;
+      }
+    }
+    if (!foundLanguage) {
+      System.out.println("Unknown language '" + lang
+          + "'. Supported languages are: " + supportedLanguages);
+      exitWithUsageMessage();
+    }
+    return language;
+  }
+
+  private static void exitWithUsageMessage() {
+    System.out
+        .println("Usage: java de.danielnaber.languagetool.dev.POSTagLanguageModel language");
+  }
+
+  private static void runOnStdIn(final JLanguageTool lt) throws IOException {
+    final int MAX_FILE_SIZE = 64000;
+    InputStreamReader isr = null;
+    BufferedReader br = null;
+    StringBuilder sb = new StringBuilder();
+    try {
+      isr = new InputStreamReader(new BufferedInputStream(System.in));
+      br = new BufferedReader(isr);
+      String line;
+      while ((line = br.readLine()) != null) {
+        sb.append(line);
+        sb.append('\n');
+        if (lt.getLanguage().getSentenceTokenizer().singleLineBreaksMarksPara()) {
+          tagText(sb.toString(), lt);
+          sb = new StringBuilder();
+        } else {
+          if ("".equals(line) || sb.length() >= MAX_FILE_SIZE) {
+            tagText(sb.toString(), lt);
+            sb = new StringBuilder();
+          }
+        }
+      }
+    } finally {
+      if (sb.length() > 0) {
+        tagText(sb.toString(), lt);
+      }
+    }
+
+    br.close();
+    isr.close();
+  }
+
+  private static void tagText(final String contents, final JLanguageTool lt)
+      throws IOException {
+    AnalyzedSentence analyzedText;
+    final List<String> sentences = lt.sentenceTokenize(contents);
+    for (final String sentence : sentences) {
+      analyzedText = lt.getAnalyzedSentence(sentence);
+      System.out.println(getSentence(analyzedText));
+    }
+  }
+
+  private static String getSentence(final AnalyzedSentence sent) {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("<S>");
+    for (final AnalyzedTokenReadings atr : sent.getTokensWithoutWhitespace()) {
+      sb.append(getPOS(atr));
+      sb.append(' ');
+    }
+    sb.append("</S>");
+    return sb.toString();
+  }
+
+  private static String getPOS(final AnalyzedTokenReadings atr) {
+    final StringBuilder sb = new StringBuilder();
+    final int readNum = atr.getReadingsLength();
+    for (int i = 0; i < readNum; i++) {
+      if (!atr.isWhitespace()) {
+        sb.append(atr.getAnalyzedToken(i).getPOSTag());
+        if (i != readNum - 1) {
+          sb.append('+');
+        }
+      }
+    }
+    return sb.toString();
+  }
+  
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/PrintLocales.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/PrintLocales.java
new file mode 100644
index 0000000..a946e07
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/PrintLocales.java
@@ -0,0 +1,94 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Properties;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Used for creating ooolocales.properties file that defines a property that is
+ * needed to build Linguistic.xcu. Run internally by the ant build.
+ * 
+ * @author Marcin Miłkowski
+ */
+public final class PrintLocales {
+
+  final static String FILENAME = "ooolocales.properties";
+
+  public static void main(final String[] args) throws IOException {
+    final PrintLocales prg = new PrintLocales();
+    prg.run();
+  }
+
+  private void run() throws IOException {
+    String locales = "";
+    for (final Language element : Language.LANGUAGES) {
+      if (!element.equals(Language.DEMO)) {
+        String var;
+        for (final String variant : element.getCountryVariants()) {
+
+          if (StringTools.isEmpty(variant)) {
+            var = "";
+          } else {
+            var = "-" + variant;
+          }
+
+          if (!StringTools.isEmpty(locales)) {
+            locales = locales + " " + element.getShortName() + var;
+          } else {
+            locales = element.getShortName() + var;
+          }
+        }
+      }
+    }
+    // change attribute to writable as the property file is in the repo
+    final Properties checkPropLoc = new Properties();
+    FileInputStream fIn = null;
+    try {
+      fIn = new FileInputStream(FILENAME);
+      checkPropLoc.load(fIn);
+    } finally {
+      if (fIn != null)
+        fIn.close();
+    }
+    final String oldLocales = checkPropLoc.getProperty("countryvariants");
+    if (!locales.equals(oldLocales)) {
+      final Properties propLoc = new Properties();
+      propLoc.setProperty("countryvariants", locales);
+      FileOutputStream fOut = null;
+      try {
+        fOut = new FileOutputStream(FILENAME);
+        propLoc.store(fOut, "Locales");
+      } finally {
+        if (fOut != null) {
+          fOut.close();
+        } else {
+          System.err.println("Cannot save new locales!");
+          System.exit(1);
+        }
+      }
+    }
+  }
+  
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/RuleOverview.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/RuleOverview.java
new file mode 100644
index 0000000..c29b074
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/RuleOverview.java
@@ -0,0 +1,195 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.language.Contributor;
+import de.danielnaber.languagetool.tools.StringTools;
+import de.danielnaber.languagetool.tools.Tools;
+
+/**
+ * Command line tool to list supported languages and their number of rules.
+ * 
+ * @author Daniel Naber
+ */
+public final class RuleOverview {
+
+  public static void main(final String[] args) throws IOException {
+    final RuleOverview prg = new RuleOverview();
+    prg.run();
+  }
+  
+  private RuleOverview() {
+    // no constructor
+  }
+  
+  private void run() throws IOException {
+    System.out.println("<b>Rules in LanguageTool " + JLanguageTool.VERSION + "</b><br />");
+    System.out.println("Date: " + new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + "<br /><br />\n");
+    System.out.println("<table>");
+    System.out.println("<tr>");
+    System.out.println("  <th></th>");
+    System.out.println("  <th align=\"right\">XML rules</th>");
+    System.out.println("  <th>&nbsp;&nbsp;</th>");
+    System.out.println("  <th align=\"right\">Java rules</th>");
+    System.out.println("  <th>&nbsp;&nbsp;</th>");
+    System.out.println("  <th align=\"right\">" +
+        "<a href=\"http://languagetool.cvs.sourceforge.net/*checkout*/languagetool/" +
+        "JLanguageTool/src/rules/false-friends.xml\">False friends</a></th>");
+    System.out.println("  <th>&nbsp;&nbsp;</th>");
+    System.out.println("  <th align=\"left\">Rule Maintainers</th>");
+    System.out.println("</tr>");
+    final List<String> sortedLanguages = new ArrayList<String>();
+    for (Language element : Language.LANGUAGES) {
+      if (element == Language.DEMO) {
+        continue;
+      }
+      sortedLanguages.add(element.getName());
+    }
+    Collections.sort(sortedLanguages);
+
+    //setup false friends counting
+    final String falseFriendFile = JLanguageTool.getDataBroker().getRulesDir() + File.separator + "false-friends.xml";
+    final java.net.URL falseFriendUrl = this.getClass().getResource(falseFriendFile);
+    final String falseFriendRules = StringTools.readFile(Tools.getStream(falseFriendFile))
+      .replaceAll("(?s)<!--.*?-->", "")
+      .replaceAll("(?s)<rules.*?>", "");
+
+    for (final String langName : sortedLanguages) {
+      final Language lang = Language.getLanguageForName(langName);
+      System.out.print("<tr>");
+      System.out.print("<td>" + lang.getName() + "</td>");
+      final String xmlFile = JLanguageTool.getDataBroker().getRulesDir() + File.separator + lang.getShortName() + File.separator + "grammar.xml";
+      final java.net.URL url = this.getClass().getResource(xmlFile);    
+      if (url == null) {
+        System.out.println("<td align=\"right\">0</td>");
+      } else {
+        // count XML rules:
+        String xmlRules = StringTools.readFile(Tools.getStream(xmlFile));
+        xmlRules = xmlRules.replaceAll("(?s)<!--.*?-->", "");
+        xmlRules = xmlRules.replaceAll("(?s)<rules.*?>", "");
+        int pos = 0;
+        int count = 0;
+        while (true) {
+          pos = xmlRules.indexOf("<rule ", pos + 1);          
+          if (pos == -1) {
+            break;
+          }          
+          count++;
+        }
+        pos = 0;
+        int countInRuleGroup = 0;
+        while (true) {
+          pos = xmlRules.indexOf("<rule>", pos + 1);          
+          if (pos == -1) {
+            break;
+          }          
+          countInRuleGroup++;
+        }
+        System.out.print("<td align=\"right\">" + (count + countInRuleGroup) + " (" +
+            "<a href=\"http://languagetool.cvs.sourceforge.net/*checkout*/languagetool/" +
+            "JLanguageTool/src/rules/" + lang.getShortName() + "/grammar.xml\">show</a>/" +
+            "<a href=\"http://community.languagetool.org/rule/list?lang=" +
+            lang.getShortName() + "\">browse</a>" +
+            ")</td>");
+      }
+      System.out.print("<td></td>");
+
+      // count Java rules:
+      final File dir = new File("src/java/de/danielnaber/languagetool" + 
+    		  JLanguageTool.getDataBroker().getRulesDir() + "/" + lang.getShortName());
+      if (!dir.exists()) {
+        System.out.print("<td align=\"right\">0</td>");
+      } else {
+        final File[] javaRules = dir.listFiles(new JavaFilter());
+        final int javaCount = javaRules.length-1;   // minus 1: one is always "<Language>Rule.java"
+        System.out.print("<td align=\"right\">" + javaCount + "</td>");
+      }
+
+      // false friends
+      System.out.println("<td></td>"); 
+      if (falseFriendUrl == null) {
+        System.out.println("<td align=\"right\">0</td>");
+      } else {
+        // count XML rules:
+        int pos = 0;
+        int count = 0;
+        while (true) {
+          pos = falseFriendRules.indexOf("<pattern lang=\""+ lang.getShortName(), pos + 1);
+          if (pos == -1) {
+            break;
+          }          
+          count++;
+        }
+        System.out.print("<td align=\"right\">" + count + "</td>");
+
+        // maintainer information:
+        System.out.print("<td></td>");
+        final StringBuilder maintainerInfo = new StringBuilder();
+        if (lang.getMaintainers() != null) {
+          for (Contributor contributor : lang.getMaintainers()) {
+            if (!StringTools.isEmpty(maintainerInfo. toString())) {
+              maintainerInfo.append(", ");
+            }
+            if (contributor.getUrl() != null) {
+              maintainerInfo.append("<a href=\""); 
+              maintainerInfo.append(contributor.getUrl()); 
+              maintainerInfo.append("\">");
+            }
+            maintainerInfo.append(contributor.getName());
+            if (contributor.getUrl() != null) {
+              maintainerInfo.append("</a>");
+            }
+            if (contributor.getRemark() != null) {
+              maintainerInfo.append("&nbsp;(" + contributor.getRemark() + ")");
+            }
+          }
+        }
+        System.out.print("<td align=\"left\">" + maintainerInfo.toString() +
+          "</td>");
+      }
+      
+      System.out.println("</tr>");    
+    }
+
+    System.out.println("</table>");    
+  }
+
+}
+
+class JavaFilter implements FileFilter {
+
+  public boolean accept(final File f) {
+    if (f.getName().endsWith(".java")) {
+      return true;
+    }
+    return false;
+  }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/tools/RomanianDiacriticsModifier.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/tools/RomanianDiacriticsModifier.java
new file mode 100644
index 0000000..6fc90bf
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/tools/RomanianDiacriticsModifier.java
@@ -0,0 +1,99 @@
+package de.danielnaber.languagetool.dev.tools;
+
+/**
+ * 
+ * Helper class for romanian diacritics correction. Many romanian texts
+ * (including Romanian wikipedia) contains wrong diacritics: <b>ş</b> instead of
+ * <b>ș</b> and <b>ţ</b> instead of <b>ț</b>.
+ * 
+ * @author Ionuț Păduraru
+ * @since 14.04.2009 12:27:24
+ */
+public final class RomanianDiacriticsModifier {
+
+	private RomanianDiacriticsModifier() {
+		// private constructor
+	}
+	private static final int REPLACEMENT_BUFF_SIZE = 10 * 1024;
+	private static char[] cCorrectDiacritics = null;
+	private static char[] replacementBuff = null;
+
+	/**
+	 * Initialize internal buffers
+	 * 
+	 * @author Ionuț Păduraru
+	 * @since 14.04.2009 12:32:29
+	 */
+	private synchronized static void initCharMap() {
+		if (cCorrectDiacritics == null) {
+			replacementBuff = new char[REPLACEMENT_BUFF_SIZE];
+			cCorrectDiacritics = new char[Character.MAX_VALUE
+					- Character.MIN_VALUE];
+			char c = Character.MIN_VALUE;
+			for (int i = 0; i < Character.MAX_VALUE - Character.MIN_VALUE; i++) {
+				final char newC = diac(c);
+				cCorrectDiacritics[i] = newC;
+				c++;
+			}
+		}
+	}
+
+	/**
+	 * Single character correction. Used internally during buffers
+	 * initialization
+	 * 
+	 * @author Ionuț Păduraru
+	 * @since 14.04.2009 12:32:52
+	 * @param c
+	 * @return
+	 */
+	private static char diac(char c) {
+		switch (c) {
+		case 'ş':
+			c = 'ș';
+			break;
+		case 'ţ':
+			c = 'ț';
+			break;
+		case 'Ţ':
+			c = 'Ț';
+			break;
+		case 'Ş':
+			c = 'Ș';
+			break;
+		default:
+			break;
+		}
+		return c;
+	}
+
+	/**
+	 * Romanian diactitics correction: replace <b>ş</b> with <b>ș</b> and
+	 * <b>ţ</b> with <b>ț</b>(including upper-case variants). <br/>
+	 * Thread-safe method.
+	 * 
+	 * @author Ionuț Păduraru
+	 * @since 14.04.2009 12:33:39
+	 * @param s
+	 */
+	public static synchronized String correctDiacritrics(String s) {
+		if (null == s)
+			return null;
+		initCharMap();
+		final int length = s.length();
+		// check buffer size
+		if (length > replacementBuff.length) {
+			replacementBuff = new char[length];
+		}
+		// get current chars
+		s.getChars(0, length, replacementBuff, 0);
+		// replace
+		for (int i = 0; i < length; i++) {
+			replacementBuff[i] = cCorrectDiacritics[replacementBuff[i]];
+
+		}
+		// return the corrected string
+		return String.valueOf(replacementBuff, 0, length);
+	}
+
+}
+\ No newline at end of file
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/BaseWikipediaDumpHandler.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/BaseWikipediaDumpHandler.java
new file mode 100644
index 0000000..589c0e2
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/BaseWikipediaDumpHandler.java
@@ -0,0 +1,155 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2010 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev.wikipedia;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.List;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TextFilter;
+import de.danielnaber.languagetool.dev.tools.RomanianDiacriticsModifier;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Read the Wikipedia XML dump, check texts with LanguageTool, and
+ * let result be handled in sub classes.
+ */
+abstract class BaseWikipediaDumpHandler extends DefaultHandler {
+
+  protected static final int CONTEXT_SIZE = 50; 
+  protected static final String MARKER_START = "<err>";
+  protected static final String MARKER_END = "</err>";
+  protected static final String LANG_MARKER = "XX";
+  protected static final String URL_PREFIX = "http://" + LANG_MARKER + ".wikipedia.org/wiki/";
+
+  protected Date dumpDate;
+  protected String langCode;
+
+  private final JLanguageTool languageTool;
+  private int ruleMatchCount = 0;
+  private int articleCount = 0;
+  private int maxArticles = 0;
+
+  private boolean inText = false;
+  private StringBuilder text = new StringBuilder();
+  
+  private TextFilter textFilter = new WikipediaTextFilter();
+
+  private String title;
+  private final Language lang;
+
+  //===========================================================
+  // SAX DocumentHandler methods
+  //===========================================================
+
+  protected BaseWikipediaDumpHandler(JLanguageTool languageTool, int maxArticles, Date dumpDate,
+      String langCode, Language lang) {
+    this.lang = lang;
+    this.languageTool = languageTool;
+    this.maxArticles = maxArticles;
+    this.dumpDate = dumpDate;
+    this.langCode = langCode;
+    initTextFilter();
+  }
+  
+  /**
+   * initialize textFilter field 
+   */
+  private void initTextFilter() {
+    if (Language.ROMANIAN == lang) {
+      textFilter = new WikipediaTextFilter() {
+        @Override
+        public String filter(String arg0) {
+          final String tmp = super.filter(arg0);
+          // diacritics correction (comma-bellow instead of sedilla for ș and ț)
+          return RomanianDiacriticsModifier.correctDiacritrics(tmp);
+        }
+      };
+    } else {
+      textFilter = new WikipediaTextFilter();
+    }
+  }
+
+  @SuppressWarnings("unused")
+  public void startElement(String namespaceURI, String lName, String qName,
+      Attributes attrs) throws SAXException {
+    if (qName.equals("title")) {
+      inText = true;
+    } else if (qName.equals("text")) {
+      inText = true;
+    }
+  }
+
+  @SuppressWarnings("unused")
+  public void endElement(String namespaceURI, String sName, String qName) {
+    if (qName.equals("title")) {
+      title = text.toString();
+      text = new StringBuilder();
+    } else if (qName.equals("text")) {
+      //System.err.println(text.length() + " " + text.substring(0, Math.min(50, text.length())));
+      final String textToCheck = textFilter.filter(text.toString());
+      //System.out.println(textToCheck);
+      if (!textToCheck.contains("#REDIRECT")) {
+        //System.err.println("#########################");
+        //System.err.println(textToCheck);
+        try {
+          articleCount++;
+          if (maxArticles > 0 && articleCount > maxArticles) {
+            System.out.printf("Maximum number of articles reached. Found %d matches in %d articles\n",
+                ruleMatchCount, articleCount);
+            System.exit(0);
+          }
+          final List<RuleMatch> ruleMatches = languageTool.check(textToCheck);
+          System.out.println("Checking article " + articleCount + " (" +
+              textToCheck.length()/1024 + "KB, '" + title + "')" + 
+              ", found " + ruleMatches.size() + " matches");
+          try {
+            handleResult(title, ruleMatches, textToCheck, languageTool.getLanguage());
+          } catch (Exception e) {
+            throw new RuntimeException(e);
+          }
+          ruleMatchCount += ruleMatches.size();
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      text = new StringBuilder();
+    }
+    inText = false;
+  }
+
+  public void characters(char buf[], int offset, int len) {
+    final String s = new String(buf, offset, len);
+    if (inText) {
+      text.append(s);
+    }
+  }
+  
+  abstract protected void handleResult(String title, List<RuleMatch> ruleMatches,
+      String text, Language language) throws Exception;
+
+  abstract protected void close();
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/CheckWikipediaDump.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/CheckWikipediaDump.java
new file mode 100644
index 0000000..3eabdd8
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/CheckWikipediaDump.java
@@ -0,0 +1,143 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+/*
+ *
+ * Created on 21.12.2006
+ */
+package de.danielnaber.languagetool.dev.wikipedia;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+
+/**
+ * Command-line tool that checks texts from Wikipedia (download "pages-articles.xml.bz2" from
+ * http://download.wikimedia.org/backup-index.html, e.g.
+ * http://download.wikimedia.org/dewiki/latest/dewiki-latest-pages-articles.xml.bz2)
+ * and stores the result in a database.
+ * 
+ * @author Daniel Naber
+ */
+public class CheckWikipediaDump {
+
+  private CheckWikipediaDump() {
+    // no public constructor
+  }
+  
+  public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException {
+    final CheckWikipediaDump prg = new CheckWikipediaDump();
+    if (args.length < 3 || args.length > 4) {
+      System.err.println("Usage: CheckWikipediaDump <propertyFile> <language> <filename> [maxArticleCheck]");
+      System.err.println("\tpropertyFile a file to set database access properties. Use '-' to print results to stdout.");
+      System.err.println("\tlanguage languagecode like 'en' or 'de'");
+      System.err.println("\tfilename path to unpacked Wikipedia XML dump");
+      System.err.println("\tmaxArticleCheck optional: maximum number of articles to check");
+      System.exit(1);
+    }
+    int maxArticles = 0;
+    if (args.length == 4) {
+      maxArticles = Integer.parseInt(args[3]);
+    }
+    File propFile = null;
+    if (!"-".equals(args[0])) {
+      propFile = new File(args[0]);
+      if (!propFile.exists() || propFile.isDirectory()) {
+        throw new IOException("file not found or isn't a file: " + propFile.getAbsolutePath());
+      }
+    }
+    prg.run(propFile, args[1], args[2], maxArticles);
+  }
+  
+  private void run(File propFile, String language, String textFilename, int maxArticles) 
+      throws IOException, SAXException, ParserConfigurationException {
+    final File file = new File(textFilename);
+    if (!file.exists() || !file.isFile()) {
+      throw new IOException("File doesn't exist or isn't a file: " + textFilename);
+    }
+    final Language lang = Language.getLanguageForShortName(language);
+    if (lang == null) {
+      System.err.println("Language not supported: " + language);
+      System.exit(1);
+    }
+    final JLanguageTool languageTool = new JLanguageTool(lang);
+    languageTool.activateDefaultPatternRules();
+    // useful settings (avoid false alarms) because text extraction
+    // from Wikipedia isn't clean yet:
+    languageTool.disableRule("DE_CASE");    // too many false hits
+    languageTool.disableRule("UNPAIRED_BRACKETS");
+    languageTool.disableRule("UPPERCASE_SENTENCE_START");
+    languageTool.disableRule("WORD_REPEAT_RULE");
+    languageTool.disableRule("COMMA_PARENTHESIS_WHITESPACE");
+    languageTool.disableRule("WHITESPACE_RULE");
+    languageTool.disableRule("EN_QUOTES");        // en
+    languageTool.disableRule("CUDZYSLOW_DRUKARSKI");  // pl
+    languageTool.disableRule("POMIŠLJAJ_1");  // sl
+    languageTool.disableRule("POMIŠLJAJ_2");  // sl
+    languageTool.disableRule("POMIŠLJAJ_3");  // sl
+    /*
+    List rules = lt.getAllRules();
+    for (Iterator iter = rules.iterator(); iter.hasNext();) {
+      Rule element = (Rule) iter.next();
+      lt.disableRule(element.getId());
+    }
+    lt.enableRule("DE_AGREEMENT");
+    */
+    System.err.println("These rules are disabled: " + languageTool.getDisabledRules());
+    final Date dumpDate = getDumpDate(file);
+    System.out.println("Dump date: " + dumpDate + ", language: " + language);
+    final BaseWikipediaDumpHandler handler;
+    if (propFile != null) {
+      handler = new DatabaseDumpHandler(languageTool, maxArticles, dumpDate,
+                language, propFile, lang); 
+    } else {
+      handler = new OutputDumpHandler(languageTool, maxArticles, dumpDate,
+              language, lang); 
+    }
+    final SAXParserFactory factory = SAXParserFactory.newInstance();
+    final SAXParser saxParser = factory.newSAXParser();
+    saxParser.parse(file, handler);
+  }
+
+  private Date getDumpDate(File file) throws IOException {
+    final String filename = file.getName();
+    final String[] parts = filename.split("-");
+    if (parts.length < 3) {
+      throw new IOException("Unexpected filename format: " + file.getName());
+    }
+    final SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
+    try {
+      return sdf.parse(parts[1]);
+    } catch (ParseException e) {
+      throw new IOException("Unexpected date format: " + parts[1], e);
+    }
+  }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/DatabaseDumpHandler.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/DatabaseDumpHandler.java
new file mode 100644
index 0000000..a5ad6fb
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/DatabaseDumpHandler.java
@@ -0,0 +1,90 @@
+/*
+ * Created on 04.04.2010
+ */
+package de.danielnaber.languagetool.dev.wikipedia;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.gui.Tools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * Writes result of LanguageTool check to database. Used for community.languagetool.org.
+ *  
+ * @author Daniel Naber
+ */
+class DatabaseDumpHandler extends BaseWikipediaDumpHandler {
+
+    private final Connection conn;
+
+    DatabaseDumpHandler(JLanguageTool lt, int maxArticles, Date dumpDate, String langCode,
+            File propertiesFile, Language lang) throws IOException {
+    super(lt, maxArticles, dumpDate, langCode, lang);
+    try {
+        final Properties dbProperties = new Properties();
+        dbProperties.load(new FileInputStream(propertiesFile));
+        final String dbDriver = getProperty(dbProperties, "dbDriver");
+        final String dbUrl = getProperty(dbProperties, "dbUrl");
+        final String dbUser = getProperty(dbProperties, "dbUser");
+        final String dbPassword = getProperty(dbProperties, "dbPassword");
+        Class.forName(dbDriver);
+        conn = DriverManager.getConnection(dbUrl, dbUser, dbPassword);
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException(e);
+      } catch (SQLException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    @Override
+    protected void close() {
+      if (conn != null) {
+        try {
+          conn.close();
+        } catch (SQLException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+
+    private String getProperty(Properties prop, String key) {
+      final String value = prop.getProperty(key);
+      if (value == null) {
+        throw new RuntimeException("required key '" +key+ "' not found in properties");
+      }
+      return value;
+    }
+
+    @Override
+    protected void handleResult(String title, List<RuleMatch> ruleMatches,
+            String text, Language language) throws SQLException {
+      final String sql = "INSERT INTO corpus_match " +
+              "(version, language_code, ruleid, message, error_context, corpus_date, " +
+              "check_date, sourceuri, is_visible) "+
+              "VALUES (0, ?, ?, ?, ?, ?, ?, ?, 1)";
+      final PreparedStatement prepSt = conn.prepareStatement(sql);
+      for (RuleMatch match : ruleMatches) {
+        prepSt.setString(1, language.getShortName());
+        prepSt.setString(2, match.getRule().getId());
+        prepSt.setString(3, match.getMessage());
+        prepSt.setString(4, Tools.getContext(match.getFromPos(),
+              match.getToPos(), text, CONTEXT_SIZE, MARKER_START, MARKER_END));
+        prepSt.setDate(5, new java.sql.Date(dumpDate.getTime()));
+        prepSt.setDate(6, new java.sql.Date(new Date().getTime()));
+        prepSt.setString(7, URL_PREFIX.replaceAll(LANG_MARKER, langCode) + title);
+        prepSt.executeUpdate();
+      }
+    }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/OutputDumpHandler.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/OutputDumpHandler.java
new file mode 100644
index 0000000..3a880fe
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/OutputDumpHandler.java
@@ -0,0 +1,60 @@
+/*
+ * Created on 04.04.2010
+ */
+package de.danielnaber.languagetool.dev.wikipedia;
+
+import java.util.Date;
+import java.util.List;
+
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+import de.danielnaber.languagetool.tools.StringTools;
+
+/**
+ * Writes result of LanguageTool check to stdout.
+ *  
+ * @author Daniel Naber
+ */
+class OutputDumpHandler extends BaseWikipediaDumpHandler {
+
+    OutputDumpHandler(JLanguageTool lt, int maxArticles, Date dumpDate, String langCode,
+            Language lang) {
+      super(lt, maxArticles, dumpDate, langCode, lang);
+    }
+    
+    @Override
+    protected void close() {
+    }
+
+    @Override
+    protected void handleResult(String title, List<RuleMatch> ruleMatches,
+            String text, Language language) {
+      if (ruleMatches.size() > 0) {
+        int i = 1;
+        System.out.println("\nTitle: " + title);
+        for (RuleMatch match : ruleMatches) {
+          String output = i + ".) Line " + (match.getLine() + 1) + ", column "
+            + match.getColumn() + ", Rule ID: " + match.getRule().getId();
+          if (match.getRule() instanceof PatternRule) {
+            final PatternRule pRule = (PatternRule) match.getRule();
+            output += "[" + pRule.getSubId() + "]";
+          }
+          System.out.println(output);
+          String msg = match.getMessage();
+          msg = msg.replaceAll("<suggestion>", "'");
+          msg = msg.replaceAll("</suggestion>", "'");
+          System.out.println("Message: " + msg);
+          final List<String> replacements = match.getSuggestedReplacements();
+          if (!replacements.isEmpty()) {
+            System.out.println("Suggestion: " + StringTools.listToString(replacements, "; "));
+          }
+          System.out.println(StringTools.getContext(match.getFromPos(), match
+              .getToPos(), text, CONTEXT_SIZE));
+          i++;
+        }
+      }
+    }
+
+}
diff --git a/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/WikipediaTextFilter.java b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/WikipediaTextFilter.java
new file mode 100644
index 0000000..49646e2
--- /dev/null
+++ b/JLanguageTool/src/dev/de/danielnaber/languagetool/dev/wikipedia/WikipediaTextFilter.java
@@ -0,0 +1,52 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2010 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.dev.wikipedia;
+
+import info.bliki.wiki.model.WikiModel;
+
+import org.apache.commons.lang.StringEscapeUtils;
+
+import de.danielnaber.languagetool.TextFilter;
+
+/**
+ * Convert Wikipedia syntax to HTML using Bliki and then try to clean it up (this is
+ * rather ugly).
+ */
+class WikipediaTextFilter implements TextFilter {
+
+  public String filter(String s) {
+    // TODO: find general HTML to Text converter?!:
+    final WikiModel wikiModel = new WikiModel("${image}", "${title}");
+    s = wikiModel.render(s);
+    //System.out.println("0####"+s);
+    s = s.replaceAll("\\{\\{.*?\\}\\}", "");
+    s = s.replaceAll("</p>", "\n\n");
+    s = s.replaceAll("</dt>", "\n\n");
+    s = s.replaceAll("</dl>", "\n\n");
+    s = s.replaceAll("</h\\d>", "\n\n");
+    s = s.replaceAll("<a href=\"http://[a-zA-Z-]+\\.wikipedia\\.org/wiki/.*?\">.*?</a>", "");
+    s = s.replaceAll("<.*?>", "");
+    s = s.replaceAll("\n\n*", "\n\n");    // single line break isn't detected as paragraph in LT by default
+    s = StringEscapeUtils.unescapeHtml(s);
+    //System.out.println("1############################################\n"+s);
+    //System.out.println("/############################################"+s);
+    return s;
+  }
+
+}
diff --git a/JLanguageTool/src/dev/tools/add_short.xsl b/JLanguageTool/src/dev/tools/add_short.xsl
new file mode 100644
index 0000000..571e41a
--- /dev/null
+++ b/JLanguageTool/src/dev/tools/add_short.xsl
@@ -0,0 +1,59 @@
+<?xml version="1.0" ?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+<!--
+ A simple stylesheet that adds "short" element with category name to grammar files 
+ Copyright (C) 2008 Marcin Miłkowski
+ 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ 
+Note: remove DOCTYPE declaration before conversion and add after it. Otherwise, you'd get
+all default values in the grammar.xml!!! 
+
+Usage:
+
+java -jar saxon8.jar grammar.xml add_short.xsl >new_grammar.xml
+
+Then rename new_grammar.xml to grammar.xml, after making a backup of grammar.xml
+-->
+
+ <xsl:output method="xml" encoding="utf-8" indent="no"/>
+ 
+ <xsl:template match="@*|node()">
+    <xsl:copy>
+      <xsl:apply-templates select="@*|node()"/>
+    </xsl:copy>
+  </xsl:template>
+
+ <xsl:template match="@xml:space"/>  
+  
+ <xsl:template match="message">
+	<xsl:copy>
+		<xsl:apply-templates select="@*|node()"/>
+	</xsl:copy>
+	<xsl:text>
+	</xsl:text>
+	<xsl:element name="short">
+	<xsl:choose>
+	<xsl:when test="name(../..)='rulegroup'">
+	<xsl:value-of select="../../../@name"></xsl:value-of>
+	</xsl:when>
+	<xsl:otherwise><xsl:value-of select="../../@name"/></xsl:otherwise>
+	</xsl:choose>
+	</xsl:element>	
+</xsl:template>
+
+</xsl:stylesheet>
+\ No newline at end of file
diff --git a/JLanguageTool/src/dev/tools/convert.xsl b/JLanguageTool/src/dev/tools/convert.xsl
new file mode 100644
index 0000000..3e70426
--- /dev/null
+++ b/JLanguageTool/src/dev/tools/convert.xsl
@@ -0,0 +1,50 @@
+<?xml version="1.0" ?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+<!-- XSLT stylesheet to convert grammar.xml <em> elements
+
+ Copyright (C) 2008 Marcin Miłkowski.
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+
+Note: it's obsolete and useless for current grammar.xml files. 
+
+usage:
+
+java -jar saxon8.jar grammar.xml convert.xsl
+
+-->
+ <xsl:output method="xml" encoding="utf-8" indent="yes"/>
+ 
+ <xsl:template match="@*|node()">
+    <xsl:copy>
+      <xsl:apply-templates select="@*|node()"/>
+    </xsl:copy>
+  </xsl:template>
+
+ <xsl:template match="//message/em">
+ <xsl:element name="suggestion">
+ <xsl:value-of select="./text()"/>
+ </xsl:element>
+</xsl:template>
+
+ <xsl:template match="//example/em">
+ <xsl:element name="marker">
+ <xsl:value-of select="./text()"/>
+ </xsl:element>
+</xsl:template>
+
+</xsl:stylesheet>
+\ No newline at end of file
diff --git a/JLanguageTool/src/dev/tools/print.xsl b/JLanguageTool/src/dev/tools/print.xsl
new file mode 100644
index 0000000..2e775d6
--- /dev/null
+++ b/JLanguageTool/src/dev/tools/print.xsl
@@ -0,0 +1,200 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+	version="2.0">
+	<!-- XSLT stylesheet to pretty print grammar.xml
+		
+Copyright (C) 2008 Marcin Miłkowski
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ 		
+		usage:
+		
+		java -jar saxon8.jar grammar.xml print.xsl
+		
+		This version doesn't work in Firefox, unfortunately...
+		
+	-->
+	<xsl:output method="html" encoding="UTF-8" indent="no" />
+
+	<xsl:template match="text()" />
+
+	<xsl:template match="*">	
+			<xsl:apply-templates select="*">
+				<xsl:sort select="@name"/>
+			</xsl:apply-templates>
+	</xsl:template>
+	
+	<xsl:template match="//category">		
+		<xsl:variable name="category_name" select="@name"/>
+		<xsl:variable name="cat_id" select="generate-id()"/>
+		<xsl:element name="div">
+		<xsl:attribute name="id"><xsl:copy-of select="$cat_id"/></xsl:attribute>
+		<xsl:attribute name="style">display:none</xsl:attribute>			
+		<h4>		
+		<xsl:element name="a">
+		<xsl:attribute name="href">javascript:;</xsl:attribute>
+		<xsl:attribute name="onmousedown">toggleDiv('<xsl:copy-of select="$cat_id"/>');</xsl:attribute>
+		<xsl:value-of select="$category_name"/>
+		</xsl:element>
+        (<xsl:value-of select="count(rule[@id!=''])+count(rulegroup[@id!=''])"/>)
+		</h4>
+		<ol>			
+			<xsl:apply-templates select="*">
+				<xsl:sort select="@name"/>
+			</xsl:apply-templates>
+		</ol>
+		</xsl:element>
+		<h4>		
+		<xsl:element name="a">
+		<xsl:attribute name="href">javascript:;</xsl:attribute>
+		<xsl:attribute name="onmousedown">toggleDiv('<xsl:copy-of select="$cat_id"/>');</xsl:attribute>
+		<xsl:value-of select="$category_name"/>
+		</xsl:element>
+        (<xsl:value-of select="count(rule[@id!=''])+count(rulegroup[@id!=''])"/>)
+		</h4>
+	</xsl:template>
+
+
+	<xsl:template match="//rule[@id!='']">
+		<li>
+			<xsl:value-of select="@name" />			
+		</li>
+		<ul>
+			<xsl:apply-templates select="*" />
+		</ul>
+	</xsl:template>
+
+	<xsl:template match="//rulegroup">
+		<li>
+			<xsl:value-of select="@name" />
+		</li>
+		<ul>
+			<xsl:apply-templates select="*" />
+		</ul>
+	</xsl:template>
+
+
+	<xsl:template match="//rule/example[@type='incorrect']">
+		<li>
+			<xsl:apply-templates select="*|text()" /> <br/>
+			<xsl:if test="../short/text()!=''">
+			<xsl:value-of select="../short/text()"/>. 
+			</xsl:if> 
+			<xsl:if test="@correction !=''">
+			<xsl:choose>
+			<xsl:when test="not(contains(@correction, '|')) and not(contains(../message/text()[1], '\')) and count(../message/text()) &lt; 3">
+			<xsl:copy-of select="../message/text()[1]"/>
+			<strong style="color: #339900;"><xsl:value-of select="@correction"/></strong>
+			<xsl:copy-of select="../message/text()[2]"/>
+			</xsl:when>
+			<xsl:otherwise>
+<!--
+Remaining problem: replace \1 in message text with pattern/token[1]
+
+ 
+			<xsl:choose>
+			<xsl:when test="//rules[@lang='pl']">Poprawnie: </xsl:when>
+			<xsl:when test="//rules[@lang='en']">Correctly: </xsl:when>
+			<xsl:when test="//rules[@lang='de']">Korrekt: </xsl:when>
+			<xsl:when test="//rules[@lang='fr']">Correctement : </xsl:when>
+			<xsl:when test="//rules[@lang='nl']">Correct: </xsl:when>
+			<xsl:when test="//rules[@lang='es']">Correctamente: </xsl:when>
+			</xsl:choose>
+				
+				<strong style="color: #339900;">
+					<xsl:value-of select="@correction"/>
+				</strong>
+				 --> 
+											
+				<xsl:variable name="message" select="../message/text()"/>
+					<xsl:for-each select="tokenize(@correction,'\|')">
+					<xsl:variable name="message_cnt" select="position()"/>					
+					<xsl:value-of select="$message[$message_cnt]"/>					
+					<strong style="color: #339900;">					
+					<xsl:value-of select="."/>
+					</strong>
+					<xsl:if test="position()=last()">
+						<xsl:variable name="last" select="last()+1"/>
+						<xsl:value-of select="$message[$last]"/>
+					</xsl:if>										  
+					</xsl:for-each>				
+			 </xsl:otherwise>
+			 </xsl:choose>
+			</xsl:if>
+		</li>
+	</xsl:template>
+
+	<xsl:template match="//rule/example[@type='incorrect']/text()">
+		<xsl:copy-of select="." />
+	</xsl:template>
+
+	<xsl:template match="//rule/example[@type='incorrect']/marker">
+		<strong style="color: rgb(255, 0, 0);">
+			<xsl:value-of select="./text()" />
+		</strong>
+	</xsl:template>
+	
+	<xsl:template match="//rules">	
+	<html>
+	<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
+	<head>
+	<script language="javascript">
+	<xsl:text>
+	  function toggleDiv(divid){
+    	if(document.getElementById(divid).style.display == 'none'){
+	      document.getElementById(divid).style.display = 'block';
+    	}else{
+	      document.getElementById(divid).style.display = 'none';
+    	}
+	  }
+	 </xsl:text>
+	</script>
+	</head>
+	<body>
+        <noscript><p><strong>Note:</strong> this page requires Javascript to work</p></noscript>
+		<xsl:choose>
+		<xsl:when test="//rules[@lang='pl']">Łączna liczba reguł: </xsl:when>
+		<xsl:otherwise>Total number of rules: </xsl:otherwise>
+		</xsl:choose>
+		<strong>
+			<xsl:value-of select="count(//rule)"/>
+		</strong>		
+		<br/>
+		<xsl:choose>
+		<xsl:when test="//rules[@lang='pl']">W tym z podpowiedziami: </xsl:when>
+		<xsl:otherwise>Rules with suggestions: </xsl:otherwise>
+		</xsl:choose>
+		<strong>
+			<xsl:value-of select="count(//message[suggestion!=''])"/>
+		</strong>
+		<br/>
+		<xsl:choose>
+		<xsl:when test="//rules[@lang='pl']">Liczba widocznych typów reguł: </xsl:when>
+		<xsl:otherwise>Total number of visible rule types: </xsl:otherwise>
+		</xsl:choose>
+		<strong>
+			<xsl:value-of select="count(//rule[@id!=''])+count(//rulegroup[@id!=''])"/>
+		</strong>		
+		<br/>
+	
+			<xsl:apply-templates select="*">
+				<xsl:sort select="@name"/>
+			</xsl:apply-templates>
+	</body>
+	</html>
+	</xsl:template>	
+	
+</xsl:stylesheet>
+\ No newline at end of file
diff --git a/JLanguageTool/src/dev/tools/stats.awk b/JLanguageTool/src/dev/tools/stats.awk
new file mode 100644
index 0000000..aa1760e
--- /dev/null
+++ b/JLanguageTool/src/dev/tools/stats.awk
@@ -0,0 +1,62 @@
+#Script to sort rule matches from LanguageTool
+#Usage: gawk -f stats.awk <file_created_by_LanguageTool>
+#(c) 2008, Marcin Milkowski
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+# USA
+
+/^[0-9]+\.\)/ {
+gsub(/^.*ID: /,"")
+rule_cnt[$0]++
+current_rule=$0
+rulematch=1
+linecnt=0
+}
+/^(Message: |Suggestion:)/ {
+comments[current_rule]= comments[current_rule] "\n" $0
+linecnt++
+}
+!/^($|Message: |Suggestion:|Time:)/ && !/ \^/ {
+if (linecnt>0) 
+comments[current_rule]= comments[current_rule] "\n" $0
+}
+/^ / && / \^/ {
+comments[current_rule]= comments[current_rule] "\n" $0 "\n"
+}
+END {
+if (rulematch==1) {
+print "LanguageTool rule matches in descending order"
+print "============================================="
+print ""
+}
+z = asorti(rule_cnt, rule_names)
+#for (i = 1; i <= z; i++)
+ #   print i " " rule_names[i]
+n = asort(rule_cnt, rules)
+
+for (i = z; i >= 1; i--) {
+
+	for (j = 1; j <= z; j++) {
+#		print j " " rule_names[j] " => " rule_cnt[rule_names[j]]
+		if (rule_cnt[rule_names[j]]==rules[i] \
+			&& printed[rule_names[j]]!="done") {				
+				printed[rule_names[j]]="done"				
+				rule=rule_names[j]
+	print "Rule ID: " rule ", matches: " rule_cnt[rule]
+	print comments[rule]
+	print "============="
+		}
+	}
+}	
+}
+\ No newline at end of file