summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
diff options
context:
space:
mode:
Diffstat (limited to 'JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java')
-rw-r--r--JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java502
1 files changed, 502 insertions, 0 deletions
diff --git a/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
new file mode 100644
index 0000000..a1dfeaa
--- /dev/null
+++ b/JLanguageTool/src/test/de/danielnaber/languagetool/rules/patterns/PatternRuleTest.java
@@ -0,0 +1,502 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import junit.framework.TestCase;
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.TestTools;
+import de.danielnaber.languagetool.rules.IncorrectExample;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+
+/**
+ * @author Daniel Naber
+ */
+public class PatternRuleTest extends TestCase {
+
+ private static JLanguageTool langTool;
+
+ private static final Pattern PROBABLE_REGEX = Pattern.compile("[^\\[\\]\\*\\+\\|\\^\\{\\}\\?][\\[\\]\\*\\+\\|\\^\\{\\}\\?]|\\\\[^0-9]|\\(.+\\)|\\..");
+
+ private static final Pattern CASE_REGEX = Pattern.compile("\\[(.)(.)\\]");
+
+
+ @Override
+ public void setUp() throws IOException {
+ if (langTool == null) {
+ langTool = new JLanguageTool(Language.ENGLISH);
+ }
+ }
+
+ public void testGrammarRulesFromXML() throws IOException {
+ testGrammarRulesFromXML(null, false);
+ }
+
+ private void testGrammarRulesFromXML(final Set<Language> ignoredLanguages,
+ final boolean verbose) throws IOException {
+ for (final Language lang : Language.LANGUAGES) {
+ if (ignoredLanguages != null && ignoredLanguages.contains(lang)) {
+ if (verbose) {
+ System.out.println("Ignoring tests for " + lang.getName());
+ }
+ continue;
+ }
+ if (verbose) {
+ System.out.println("Running tests for " + lang.getName() + "...");
+ }
+ final PatternRuleLoader ruleLoader = new PatternRuleLoader();
+ final JLanguageTool languageTool = new JLanguageTool(lang);
+ final String name = "/" + lang.getShortName() + "/grammar.xml";
+ final List<PatternRule> rules = ruleLoader.getRules(JLanguageTool.getDataBroker().
+ getFromRulesDirAsStream(name), name);
+ warnIfRegexpSyntax(rules, lang);
+ testGrammarRulesFromXML(rules, languageTool, lang);
+ }
+ }
+
+ // TODO: probably this would be more useful for exceptions
+ // instead of adding next methods to PatternRule
+ // we can probably validate using XSD and specify regexes straight there
+ private void warnIfRegexpSyntax(final List<PatternRule> rules,
+ final Language lang) {
+ for (final PatternRule rule : rules) {
+ int i = 0;
+ for (final Element element : rule.getElements()) {
+ i++;
+ warnIfElementNotKosher(element, lang, rule.getId());
+ if (element.getExceptionList() != null) {
+ for (final Element exception: element.getExceptionList()) {
+ warnIfElementNotKosher(exception, lang, rule.getId()
+ + " (exception in token [" + i + "]:" + element +") ");
+ }
+ }
+ }
+ }
+ }
+
+ private void warnIfElementNotKosher(final Element element,
+ final Language lang, final String ruleId) {
+ if (!element.isRegularExpression()
+ && (PROBABLE_REGEX.matcher(element.getString())
+ .find())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is not marked as regular expression"
+ + " but probably is one.");
+ }
+ if (element.isRegularExpression() && "".equals(element.getString())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains an empty string element " + "\"" + element
+ + "\" that is marked as regular expression (don't look at the POS tag, it might be OK).");
+ } else if (element.isRegularExpression()
+ && !PROBABLE_REGEX.matcher(element.getString())
+ .find()) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is marked as regular expression"
+ + " but probably is not one.");
+ }
+
+ if (element.isInflected()
+ && "".equals(element.getString())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains element " + "\"" + element
+ + "\" that is marked as inflected"
+ + " but is empty, so the attribute is redundant.");
+ }
+
+ if (element.isRegularExpression() && !element.getCaseSensitive()) {
+ Matcher matcher = CASE_REGEX.matcher(element.getString());
+ if (matcher.find()) {
+ final String letter1 = matcher.group(1);
+ final String letter2 = matcher.group(2);
+
+ if (!letter1.equals(letter2)
+ && letter1.toLowerCase().equals(letter2.toLowerCase())) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + ruleId + " contains regexp part [" + letter1 + letter2
+ + "] which is useless without case_sensitive=\"yes\".");
+ }
+ }
+ }
+
+ if (element.isRegularExpression() && element.getString().contains("|")) {
+ final String[] groups = element.getString().split("\\)");
+ final boolean caseSensitive = element.getCaseSensitive();
+ for (final String group : groups) {
+ final String[] alt = group.split("\\|");
+ final Set<String> partSet = new HashSet<String>();
+ final Set<String> partSetNoCase = new HashSet<String>();
+ for (String part : alt) {
+ String partNoCase = caseSensitive ? part : part.toLowerCase();
+ if (partSetNoCase.contains(partNoCase)) {
+ if (partSet.contains(part)) {
+ // Duplicate disjunction parts "foo|foo".
+ System.err.println("The " + lang.toString() + " rule : "
+ + ruleId + " contains duplicated disjunction part ("
+ + part + ") within the element " + "\"" + element + "\".");
+ } else {
+ // Duplicate disjunction parts "Foo|foo" since element ignores case.
+ System.err.println("The " + lang.toString() + " rule : "
+ + ruleId + " contains duplicated non case sensitive disjunction part ("
+ + part + ") within the element " + "\"" + element + "\". Did you "
+ + "forget case_sensitive=\"yes\"?");
+ }
+ }
+ partSetNoCase.add(partNoCase);
+ partSet.add(part);
+ }
+ }
+ }
+ }
+
+
+ private void testGrammarRulesFromXML(final List<PatternRule> rules,
+ final JLanguageTool languageTool, final Language lang) throws IOException {
+ int noSuggestionCount = 0;
+ final HashMap<String, PatternRule> complexRules = new HashMap<String, PatternRule>();
+ for (final PatternRule rule : rules) {
+ final List<String> goodSentences = rule.getCorrectExamples();
+ for (String goodSentence : goodSentences) {
+ // enable indentation use
+ goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
+ goodSentence = cleanXML(goodSentence);
+ assertTrue(goodSentence.trim().length() > 0);
+ assertFalse(lang + ": Did not expect error in: " + goodSentence
+ + " (Rule: " + rule + ")", match(rule, goodSentence, languageTool));
+ }
+ final List<IncorrectExample> badSentences = rule.getIncorrectExamples();
+ for (IncorrectExample origBadExample : badSentences) {
+ // enable indentation use
+ String origBadSentence = origBadExample.getExample().replaceAll(
+ "[\\n\\t]+", "");
+ final List<String> suggestedCorrection = origBadExample
+ .getCorrections();
+ final int expectedMatchStart = origBadSentence.indexOf("<marker>");
+ final int expectedMatchEnd = origBadSentence.indexOf("</marker>")
+ - "<marker>".length();
+ if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
+ fail(lang
+ + ": No error position markup ('<marker>...</marker>') in bad example in rule "
+ + rule);
+ }
+ final String badSentence = cleanXML(origBadSentence);
+ assertTrue(badSentence.trim().length() > 0);
+ RuleMatch[] matches = getMatches(rule, badSentence, languageTool);
+ if (!rule.isWithComplexPhrase()) {
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length
+ + ". Additional info:" + rule.getMessage(), matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue("You specified a correction but your message has no suggestions in rule " + rule,
+ rule.getMessage().contains("<suggestion>")
+ );
+ assertTrue(lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule " + rule,
+ suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+ }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ if (matches.length > 0) {
+ fail("Incorrect input:\n"
+ + " " + badSentence
+ + "\nCorrected sentence:\n"
+ + " " + fixedSentence
+ + "\nBy Rule:\n"
+ + " " + rule
+ + "\nThe correction triggered an error itself:\n"
+ + " " + matches[0] + "\n");
+ }
+ }
+ } else {
+ noSuggestionCount++;
+ }
+ } else { // for multiple rules created with complex phrases
+
+ matches = getMatches(rule, badSentence, languageTool);
+ if (matches.length == 0
+ && !complexRules.containsKey(rule.getId() + badSentence)) {
+ complexRules.put(rule.getId() + badSentence, rule);
+ }
+
+ if (matches.length != 0) {
+ complexRules.put(rule.getId() + badSentence, null);
+ assertTrue(lang + ": Did expect one error in: \"" + badSentence
+ + "\" (Rule: " + rule + "), got " + matches.length,
+ matches.length == 1);
+ assertEquals(lang
+ + ": Incorrect match position markup (start) for rule " + rule,
+ expectedMatchStart, matches[0].getFromPos());
+ assertEquals(lang
+ + ": Incorrect match position markup (end) for rule " + rule,
+ expectedMatchEnd, matches[0].getToPos());
+ // make sure suggestion is what we expect it to be
+ if (suggestedCorrection != null && suggestedCorrection.size() > 0) {
+ assertTrue(
+ lang + ": Incorrect suggestions: "
+ + suggestedCorrection.toString() + " != "
+ + matches[0].getSuggestedReplacements() + " for rule "
+ + rule, suggestedCorrection.equals(matches[0]
+ .getSuggestedReplacements()));
+ }
+ // make sure the suggested correction doesn't produce an error:
+ if (matches[0].getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches[0].getFromPos();
+ final int toPos = matches[0].getToPos();
+ for (final String repl : matches[0].getSuggestedReplacements()) {
+ final String fixedSentence = badSentence.substring(0, fromPos)
+ + repl + badSentence.substring(toPos);
+ matches = getMatches(rule, fixedSentence, languageTool);
+ assertEquals("Corrected sentence for rule " + rule
+ + " triggered error: " + fixedSentence, 0, matches.length);
+ }
+ } else {
+ noSuggestionCount++;
+ }
+ }
+ }
+
+ }
+ }
+ if (!complexRules.isEmpty()) {
+ final Set<String> set = complexRules.keySet();
+ final List<PatternRule> badRules = new ArrayList<PatternRule>();
+ final Iterator<String> iter = set.iterator();
+ while (iter.hasNext()) {
+ final PatternRule badRule = complexRules.get(iter.next());
+ if (badRule != null) {
+ badRule.notComplexPhrase();
+ badRule
+ .setMessage("The rule contains a phrase that never matched any incorrect example.");
+ badRules.add(badRule);
+ }
+ }
+ if (!badRules.isEmpty()) {
+ testGrammarRulesFromXML(badRules, languageTool, lang);
+ }
+ }
+ }
+
+ protected String cleanXML(final String str) {
+ return str.replaceAll("<([^<].*?)>", "");
+ }
+
+ private boolean match(final Rule rule, final String sentence,
+ final JLanguageTool languageTool) throws IOException {
+ final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+ final RuleMatch[] matches = rule.match(text);
+ return matches.length > 0;
+ }
+
+ private RuleMatch[] getMatches(final Rule rule, final String sentence,
+ final JLanguageTool languageTool) throws IOException {
+ final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
+ final RuleMatch[] matches = rule.match(text);
+ /*
+ * for (int i = 0; i < matches.length; i++) {
+ * System.err.println(matches[i]); }
+ */
+ return matches;
+ }
+
+ public void testUppercasingSuggestion() throws IOException {
+ final JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+ langTool.activateDefaultPatternRules();
+ final List<RuleMatch> matches = langTool
+ .check("Were are in the process of ...");
+ assertEquals(1, matches.size());
+ final RuleMatch match = matches.get(0);
+ final List<String> sugg = match.getSuggestedReplacements();
+ assertEquals(2, sugg.size());
+ assertEquals("Where", sugg.get(0));
+ assertEquals("We", sugg.get(1));
+ }
+
+ public void testRule() throws IOException {
+ PatternRule pr;
+ RuleMatch[] matches;
+
+ pr = makePatternRule("one");
+ matches = pr
+ .match(langTool.getAnalyzedSentence("A non-matching sentence."));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool
+ .getAnalyzedSentence("A matching sentence with one match."));
+ assertEquals(1, matches.length);
+ assertEquals(25, matches[0].getFromPos());
+ assertEquals(28, matches[0].getToPos());
+ // these two are not set if the rule is called standalone (not via
+ // JLanguageTool):
+ assertEquals(-1, matches[0].getColumn());
+ assertEquals(-1, matches[0].getLine());
+ assertEquals("ID1", matches[0].getRule().getId());
+ assertTrue(matches[0].getMessage().equals("user visible message"));
+ assertTrue(matches[0].getShortMessage().equals("short comment"));
+ matches = pr.match(langTool
+ .getAnalyzedSentence("one one and one: three matches"));
+ assertEquals(3, matches.length);
+
+ pr = makePatternRule("one two");
+ matches = pr.match(langTool.getAnalyzedSentence("this is one not two"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("this is two one"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("this is one two three"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one two"));
+ assertEquals(1, matches.length);
+
+ pr = makePatternRule("one|foo|xxxx two", false, true);
+ matches = pr.match(langTool.getAnalyzedSentence("one foo three"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("foo two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one foo two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("y x z one two blah foo"));
+ assertEquals(1, matches.length);
+
+ pr = makePatternRule("one|foo|xxxx two|yyy", false, true);
+ matches = pr.match(langTool.getAnalyzedSentence("one, yyy"));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("one yyy"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("xxxx two"));
+ assertEquals(1, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("xxxx yyy"));
+ assertEquals(1, matches.length);
+ }
+
+ private PatternRule makePatternRule(final String s) {
+ return makePatternRule(s, false, false);
+ }
+
+ private PatternRule makePatternRule(final String s,
+ final boolean caseSensitive, final boolean regex) {
+ final List<Element> elems = new ArrayList<Element>();
+ final String[] parts = s.split(" ");
+ boolean pos = false;
+ Element se = null;
+ for (final String element : parts) {
+ if (element.equals("SENT_START")) {
+ pos = true;
+ }
+ if (!pos) {
+ se = new Element(element, caseSensitive, regex, false);
+ } else {
+ se = new Element("", caseSensitive, regex, false);
+ }
+ if (pos) {
+ se.setPosElement(element, false, false);
+ }
+ elems.add(se);
+ pos = false;
+ }
+ final PatternRule rule = new PatternRule("ID1", Language.ENGLISH, elems,
+ "test rule", "user visible message", "short comment");
+ return rule;
+ }
+
+ public void testSentenceStart() throws IOException {
+ PatternRule pr;
+ RuleMatch[] matches;
+
+ pr = makePatternRule("SENT_START One");
+ matches = pr.match(langTool.getAnalyzedSentence("Not One word."));
+ assertEquals(0, matches.length);
+ matches = pr.match(langTool.getAnalyzedSentence("One word."));
+ assertEquals(1, matches.length);
+ }
+
+ private static String callFormatMultipleSynthesis(final String[] suggs,
+ final String left, final String right) throws IllegalArgumentException,
+ SecurityException, InvocationTargetException, IllegalAccessException,
+ NoSuchMethodException {
+ Class[] argClasses = { String[].class, String.class, String.class };
+ Object[] argObjects = { suggs, left, right };
+ return TestTools.callStringStaticMethod(PatternRule.class,
+ "formatMultipleSynthesis", argClasses, argObjects);
+ }
+
+ /* test private methods as well */
+ public void testformatMultipleSynthesis() throws IllegalArgumentException,
+ SecurityException, InvocationTargetException, IllegalAccessException,
+ NoSuchMethodException {
+ final String[] suggArray = { "blah blah", "foo bar" };
+
+ assertEquals(
+ "This is how you should write: <suggestion>blah blah</suggestion>, <suggestion>foo bar</suggestion>.",
+
+ callFormatMultipleSynthesis(suggArray,
+ "This is how you should write: <suggestion>", "</suggestion>."));
+
+ final String[] suggArray2 = { "test", " " };
+
+ assertEquals(
+ "This is how you should write: <suggestion>test</suggestion>, <suggestion> </suggestion>.",
+
+ callFormatMultipleSynthesis(suggArray2,
+ "This is how you should write: <suggestion>", "</suggestion>."));
+ }
+
+ /**
+ * Test XML patterns, as a help for people developing rules that are not
+ * programmers.
+ */
+ public static void main(final String[] args) throws IOException {
+ final PatternRuleTest prt = new PatternRuleTest();
+ System.out.println("Running XML pattern tests...");
+ prt.setUp();
+ final Set<Language> ignoredLanguages = new HashSet<Language>();
+ // ignoredLanguages.add(Language.CZECH); // has no XML rules yet
+ prt.testGrammarRulesFromXML(ignoredLanguages, true);
+ System.out.println("Tests successful.");
+ }
+
+}