1 files changed, 502 insertions, 0 deletions
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
new file mode 100644
index 0000000..a1dfeaa
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
@@ -0,0 +1,502 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class PatternRuleTest extends TestCase {
+
+  private static JLanguageTool langTool;
+
+  private static final Pattern PROBABLE_REGEX = Pattern.compile("[^\\[\\]\\*\\+\\|\\^\\{\\}\\?][\\[\\]\\*\\+\\|\\^\\{\\}\\?]|\\\\[^0-9]|\\(.+\\)|\\..");
+
+  private static final Pattern CASE_REGEX = Pattern.compile("\\[(.)(.)\\]");
+
+  
+  @Override
+  public void setUp() throws IOException {
+    if (langTool == null) {
+      langTool = new JLanguageTool(Language.ENGLISH);
+    }
+  }
+
+  public void testGrammarRulesFromXML() throws IOException {
+    testGrammarRulesFromXML(null, false);
+  }
+
+  private void testGrammarRulesFromXML(final Set<Language> ignoredLanguages,
+      final boolean verbose) throws IOException {
+    for (final Language lang : Language.LANGUAGES) {
+      if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
+        if (verbose) {
+          System.out.println("Ignoring tests for " + lang.getName());
+        }
+        continue;
+      }
+      if (verbose) {
+        System.out.println("Running tests for " + lang.getName() + "...");
+      }
+      final PatternRuleLoader ruleLoader = new PatternRuleLoader();
+      final JLanguageTool languageTool = new JLanguageTool(lang);
+      final String name = "/" + lang.getShortName() + "/grammar.xml";
+      final List<PatternRule> rules = ruleLoader.getRules(JLanguageTool.getDataBroker().
+    		  getFromRulesDirAsStream(name), name);
+      warnIfRegexpSyntax(rules, lang);
+      testGrammarRulesFromXML(rules, languageTool, lang);
+    }
+  }
+
+  // TODO: probably this would be more useful for exceptions
+  // instead of adding next methods to PatternRule
+  // we can probably validate using XSD and specify regexes straight there
+  private void warnIfRegexpSyntax(final List<PatternRule> rules,
+      final Language lang) {
+    for (final PatternRule rule : rules) {
+      int i = 0;
+      for (final Element element : rule.getElements()) {
+        i++;
+        warnIfElementNotKosher(element, lang, rule.getId());
+        if (element.getExceptionList() != null) {
+        for (final Element exception: element.getExceptionList()) {          
+          warnIfElementNotKosher(exception, lang, rule.getId() 
+              + " (exception in token [" + i + "]:" + element +") ");          
+        }
+        }
+      }
+    }
+  }
+
+  private void warnIfElementNotKosher(final Element element, 
+      final Language lang, final String ruleId) {
+    if (!element.isRegularExpression()
+        && (PROBABLE_REGEX.matcher(element.getString())
+            .find())) {
+      System.err.println("The " + lang.toString() + " rule: "
+          + ruleId + " contains element " + "\"" + element
+          + "\" that is not marked as regular expression"
+          + " but probably is one.");
+    }
+    if (element.isRegularExpression() && "".equals(element.getString())) {
+      System.err.println("The " + lang.toString() + " rule: "
+          + ruleId + " contains an empty string element " + "\"" + element
+          + "\" that is marked as regular expression (don't look at the POS tag, it might be OK).");
+    } else if (element.isRegularExpression()
+        && !PROBABLE_REGEX.matcher(element.getString())
+            .find()) {
+      System.err.println("The " + lang.toString() + " rule: "
+          + ruleId + " contains element " + "\"" + element
+          + "\" that is marked as regular expression"
+          + " but probably is not one."); 
+      }   
+          
+    if (element.isInflected()
+     && "".equals(element.getString())) {
+      System.err.println("The " + lang.toString() + " rule: "
+          + ruleId + " contains element " + "\"" + element
+          + "\" that is marked as inflected"
+          + " but is empty, so the attribute is redundant.");
+    }
+
+    if (element.isRegularExpression() && !element.getCaseSensitive()) {
+      Matcher matcher = CASE_REGEX.matcher(element.getString());
+      if (matcher.find()) {
+        final String letter1 = matcher.group(1);
+        final String letter2 = matcher.group(2);
+
+        if (!letter1.equals(letter2) 
+          && letter1.toLowerCase().equals(letter2.toLowerCase())) {
+          System.err.println("The " + lang.toString() + " rule: "
+             + ruleId + " contains regexp part [" + letter1 + letter2
+             + "] which is useless without case_sensitive=\"yes\".");
+        }
+      }
+    }
+
+    if (element.isRegularExpression() && element.getString().contains("|")) {
+      final String[] groups = element.getString().split("\\)");         
+      final boolean caseSensitive = element.getCaseSensitive();
+      for (final String group : groups) {        
+        final String[] alt = group.split("\\|");
+        final Set<String> partSet = new HashSet<String>();
+        final Set<String> partSetNoCase = new HashSet<String>();
+        for (String part : alt) {
+          String partNoCase = caseSensitive ? part : part.toLowerCase();
+          if (partSetNoCase.contains(partNoCase)) {
+            if (partSet.contains(part)) {
+              // Duplicate disjunction parts "foo|foo".
+              System.err.println("The " + lang.toString() + " rule : "
+                  + ruleId + " contains duplicated disjunction part (" 
+                  + part + ") within the element " + "\"" + element + "\".");
+            } else {
+              // Duplicate disjunction parts "Foo|foo" since element ignores case.
+              System.err.println("The " + lang.toString() + " rule : "
+                  + ruleId + " contains duplicated non case sensitive disjunction part (" 
+                  + part + ") within the element " + "\"" + element + "\". Did you "
+                  + "forget case_sensitive=\"yes\"?");
+            }
+          }    
+          partSetNoCase.add(partNoCase);
+          partSet.add(part);
+        }
+      }
+    }
+  }
+  
+  
+  private void testGrammarRulesFromXML(final List<PatternRule> rules,
+      final JLanguageTool languageTool, final Language lang) throws IOException {
+    int noSuggestionCount = 0;
+    final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>();
+    for (final PatternRule rule : rules) {
+      final List<String> goodSentences = rule.getCorrectExamples();
+      for (String goodSentence : goodSentences) {
+        // enable indentation use
+        goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
+        goodSentence = cleanXML(goodSentence);
+        assertTrue(goodSentence.trim().length() > 0);
+        assertFalse(lang + ": Did not expect error in: " + goodSentence
+            + " (Rule: " + rule + ")", match(rule, goodSentence, languageTool));
+      }
+      final List<IncorrectExample> badSentences = rule.getIncorrectExamples();
+      for (IncorrectExample origBadExample : badSentences) {
+        // enable indentation use
+        String origBadSentence = origBadExample.getExample().replaceAll(
+            "[\\n\\t]+", "");
+        final List<String> suggestedCorrection = origBadExample
+            .getCorrections();
+        final int expectedMatchStart = origBadSentence.indexOf("<marker>");
+        final int expectedMatchEnd = origBadSentence.indexOf("</marker>")
+            - "<marker>".length();
+        if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
+          fail(lang
+              + ": No error position markup ('<marker>...</marker>') in bad example in rule "
+              + rule);
+        }
+        final String badSentence = cleanXML(origBadSentence);
+        assertTrue(badSentence.trim().length() > 0);
+        RuleMatch[] matches = getMatches(rule, badSentence, languageTool);
+        if (!rule.isWithComplexPhrase()) {
+          assertTrue(lang + ": Did expect one error in: \"" + badSentence
+              + "\" (Rule: " + rule + "), got " + matches.length
+              + ". Additional info:" + rule.getMessage(), matches.length == 1);
+          assertEquals(lang
+              + ": Incorrect match position markup (start) for rule " + rule,
+              expectedMatchStart, matches[0].getFromPos());
+          assertEquals(lang
+              + ": Incorrect match position markup (end) for rule " + rule,
+              expectedMatchEnd, matches[0].getToPos());
+          // make sure suggestion is what we expect it to be
+          if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+            assertTrue("You specified a correction but your message has no suggestions in rule " + rule,
+              rule.getMessage().contains("<suggestion>")    
+            );
+            assertTrue(lang + ": Incorrect suggestions: "
+                + suggestedCorrection.toString() + " != "
+                + matches[0].getSuggestedReplacements() + " for rule " + rule,
+                suggestedCorrection.equals(matches[0]
+                    .getSuggestedReplacements()));
+          }
+          // make sure the suggested correction doesn't produce an error:
+          if (matches[0].getSuggestedReplacements().size() > 0) {
+            final int fromPos = matches[0].getFromPos();
+            final int toPos = matches[0].getToPos();
+            for (final String repl : matches[0].getSuggestedReplacements()) {
+              final String fixedSentence = badSentence.substring(0, fromPos)
+                  + repl + badSentence.substring(toPos);
+              matches = getMatches(rule, fixedSentence, languageTool);
+              if (matches.length > 0) {
+                  fail("Incorrect input:\n"
+                          + "  " + badSentence
+                  		  + "\nCorrected sentence:\n"
+                          + "  " + fixedSentence
+                          + "\nBy Rule:\n"
+                          + "  " + rule
+                          + "\nThe correction triggered an error itself:\n"
+                          + "  " + matches[0] + "\n");
+              }
+            }
+          } else {
+            noSuggestionCount++;
+          }
+        } else { // for multiple rules created with complex phrases
+
+          matches = getMatches(rule, badSentence, languageTool);
+          if (matches.length == 0
+              && !complexRules.containsKey(rule.getId() + badSentence)) {
+            complexRules.put(rule.getId() + badSentence, rule);
+          }
+
+          if (matches.length != 0) {
+            complexRules.put(rule.getId() + badSentence, null);
+            assertTrue(lang + ": Did expect one error in: \"" + badSentence
+                + "\" (Rule: " + rule + "), got " + matches.length,
+                matches.length == 1);
+            assertEquals(lang
+                + ": Incorrect match position markup (start) for rule " + rule,
+                expectedMatchStart, matches[0].getFromPos());
+            assertEquals(lang
+                + ": Incorrect match position markup (end) for rule " + rule,
+                expectedMatchEnd, matches[0].getToPos());
+            // make sure suggestion is what we expect it to be
+            if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+              assertTrue(
+                  lang + ": Incorrect suggestions: "
+                      + suggestedCorrection.toString() + " != "
+                      + matches[0].getSuggestedReplacements() + " for rule "
+                      + rule, suggestedCorrection.equals(matches[0]
+                      .getSuggestedReplacements()));
+            }
+            // make sure the suggested correction doesn't produce an error:
+            if (matches[0].getSuggestedReplacements().size() > 0) {
+              final int fromPos = matches[0].getFromPos();
+              final int toPos = matches[0].getToPos();
+              for (final String repl : matches[0].getSuggestedReplacements()) {
+                final String fixedSentence = badSentence.substring(0, fromPos)
+                    + repl + badSentence.substring(toPos);
+                matches = getMatches(rule, fixedSentence, languageTool);
+                assertEquals("Corrected sentence for rule " + rule
+                    + " triggered error: " + fixedSentence, 0, matches.length);
+              }
+            } else {
+              noSuggestionCount++;
+            }
+          }
+        }
+
+      }
+    }
+    if (!complexRules.isEmpty()) {
+      final Set<String> set = complexRules.keySet();
+      final List<PatternRule> badRules = new ArrayList<PatternRule>();
+      final Iterator<String> iter = set.iterator();
+      while (iter.hasNext()) {
+        final PatternRule badRule = complexRules.get(iter.next());
+        if (badRule != null) {
+          badRule.notComplexPhrase();
+          badRule
+              .setMessage("The rule contains a phrase that never matched any incorrect example.");
+          badRules.add(badRule);
+        }
+      }
+      if (!badRules.isEmpty()) {
+        testGrammarRulesFromXML(badRules, languageTool, lang);
+      }
+    }
+  }
+
+  protected String cleanXML(final String str) {
+    return str.replaceAll("<([^<].*?)>", "");
+  }
+
+  private boolean match(final Rule rule, final String sentence,
+      final JLanguageTool languageTool) throws IOException {
+    final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+    final RuleMatch[] matches = rule.match(text);
+    return matches.length > 0;
+  }
+
+  private RuleMatch[] getMatches(final Rule rule, final String sentence,
+      final JLanguageTool languageTool) throws IOException {
+    final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+    final RuleMatch[] matches = rule.match(text);
+    /*
+     * for (int i = 0; i < matches.length; i++) {
+     * System.err.println(matches[i]); }
+     */
+    return matches;
+  }
+
+  public void testUppercasingSuggestion() throws IOException {
+    final JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+    langTool.activateDefaultPatternRules();
+    final List<RuleMatch> matches = langTool
+        .check("Were are in the process of ...");
+    assertEquals(1, matches.size());
+    final RuleMatch match = matches.get(0);
+    final List<String> sugg = match.getSuggestedReplacements();
+    assertEquals(2, sugg.size());
+    assertEquals("Where", sugg.get(0));
+    assertEquals("We", sugg.get(1));
+  }
+
+  public void testRule() throws IOException {
+    PatternRule pr;
+    RuleMatch[] matches;
+
+    pr = makePatternRule("one");
+    matches = pr
+        .match(langTool.getAnalyzedSentence("A non-matching sentence."));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool
+        .getAnalyzedSentence("A matching sentence with one match."));
+    assertEquals(1, matches.length);
+    assertEquals(25, matches[0].getFromPos());
+    assertEquals(28, matches[0].getToPos());
+    // these two are not set if the rule is called standalone (not via
+    // JLanguageTool):
+    assertEquals(-1, matches[0].getColumn());
+    assertEquals(-1, matches[0].getLine());
+    assertEquals("ID1", matches[0].getRule().getId());
+    assertTrue(matches[0].getMessage().equals("user visible message"));
+    assertTrue(matches[0].getShortMessage().equals("short comment"));
+    matches = pr.match(langTool
+        .getAnalyzedSentence("one one and one: three matches"));
+    assertEquals(3, matches.length);
+
+    pr = makePatternRule("one two");
+    matches = pr.match(langTool.getAnalyzedSentence("this is one not two"));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("this is two one"));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("this is one two three"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("one two"));
+    assertEquals(1, matches.length);
+
+    pr = makePatternRule("one|foo|xxxx two", false, true);
+    matches = pr.match(langTool.getAnalyzedSentence("one foo three"));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("one two"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("foo two"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("one foo two"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("y x z one two blah foo"));
+    assertEquals(1, matches.length);
+
+    pr = makePatternRule("one|foo|xxxx two|yyy", false, true);
+    matches = pr.match(langTool.getAnalyzedSentence("one, yyy"));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("one yyy"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("xxxx two"));
+    assertEquals(1, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("xxxx yyy"));
+    assertEquals(1, matches.length);
+  }
+
+  private PatternRule makePatternRule(final String s) {
+    return makePatternRule(s, false, false);
+  }
+
+  private PatternRule makePatternRule(final String s,
+      final boolean caseSensitive, final boolean regex) {
+    final List<Element> elems = new ArrayList<Element>();
+    final String[] parts = s.split(" ");
+    boolean pos = false;
+    Element se = null;
+    for (final String element : parts) {
+      if (element.equals("SENT_START")) {
+        pos = true;
+      }
+      if (!pos) {
+        se = new Element(element, caseSensitive, regex, false);
+      } else {
+        se = new Element("", caseSensitive, regex, false);
+      }
+      if (pos) {
+        se.setPosElement(element, false, false);
+      }
+      elems.add(se);
+      pos = false;
+    }
+    final PatternRule rule = new PatternRule("ID1", Language.ENGLISH, elems,
+        "test rule", "user visible message", "short comment");
+    return rule;
+  }
+
+  public void testSentenceStart() throws IOException {
+    PatternRule pr;
+    RuleMatch[] matches;
+
+    pr = makePatternRule("SENT_START One");
+    matches = pr.match(langTool.getAnalyzedSentence("Not One word."));
+    assertEquals(0, matches.length);
+    matches = pr.match(langTool.getAnalyzedSentence("One word."));
+    assertEquals(1, matches.length);
+  }
+
+  private static String callFormatMultipleSynthesis(final String[] suggs,
+      final String left, final String right) throws IllegalArgumentException,
+      SecurityException, InvocationTargetException, IllegalAccessException,
+      NoSuchMethodException {
+    Class[] argClasses = { String[].class, String.class, String.class };
+    Object[] argObjects = { suggs, left, right };
+    return TestTools.callStringStaticMethod(PatternRule.class,
+        "formatMultipleSynthesis", argClasses, argObjects);
+  }
+
+  /* test private methods as well */
+  public void testformatMultipleSynthesis() throws IllegalArgumentException,
+      SecurityException, InvocationTargetException, IllegalAccessException,
+      NoSuchMethodException {
+    final String[] suggArray = { "blah blah", "foo bar" };
+
+    assertEquals(
+        "This is how you should write: <suggestion>blah blah</suggestion>, <suggestion>foo bar</suggestion>.",
+
+        callFormatMultipleSynthesis(suggArray,
+            "This is how you should write: <suggestion>", "</suggestion>."));
+
+    final String[] suggArray2 = { "test", " " };
+
+    assertEquals(
+        "This is how you should write: <suggestion>test</suggestion>, <suggestion> </suggestion>.",
+
+        callFormatMultipleSynthesis(suggArray2,
+            "This is how you should write: <suggestion>", "</suggestion>."));
+  }
+
+  /**
+   * Test XML patterns, as a help for people developing rules that are not
+   * programmers.
+   */
+  public static void main(final String[] args) throws IOException {
+    final PatternRuleTest prt = new PatternRuleTest();
+    System.out.println("Running XML pattern tests...");
+    prt.setUp();
+    final Set<Language> ignoredLanguages = new HashSet<Language>();
+    // ignoredLanguages.add(Language.CZECH); // has no XML rules yet
+    prt.testGrammarRulesFromXML(ignoredLanguages, true);
+    System.out.println("Tests successful.");
+  }
+
+}