summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/tools
diff options
context:
space:
mode:
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/tools')
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore1
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java232
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java581
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java37
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java626
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java127
6 files changed, 1604 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore
new file mode 100644
index 0000000..b71c741
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/.cvsignore
@@ -0,0 +1 @@
+EnglishTaggerExtract.java
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java
new file mode 100644
index 0000000..9735cac
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/ReflectionUtils.java
@@ -0,0 +1,232 @@
+/* ReflectionUtils, helper methods to load classes dynamically
+ * Copyright (C) 2007 Andriy Rysin, Marcin Milkowski, Daniel Naber
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Modifier;
+import java.net.JarURLConnection;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.*;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+public final class ReflectionUtils {
+
+ private ReflectionUtils() {
+ // a static singleton class
+ }
+
+ /**
+ * @param classLoader
+ * Classloader to use for loading classes
+ * @param packageName
+ * Package name to check classes in
+ * @param classNameRegEx
+ * If not null limit class names to this regexp. This parameter is
+ * checked before class is loaded so use it to improve performance by
+ * skipping loading extra classes
+ * @param subdirLevel
+ * If more than 0 all subdirectories/subpackages up to
+ * <code>dirLevel</code> will be traversed This parameter is checked
+ * before class is loaded - use it to improve performance by skipping
+ * loading extra classes
+ * @param classExtends
+ * If not null return only classes which extend this class
+ * @param interfaceImplements
+ * If not null return only classes which implement this interface
+ * @return Returns all classes inside given package
+ * @throws ClassNotFoundException
+ */
+ public static Class[] findClasses(final ClassLoader classLoader,
+ final String packageName, final String classNameRegEx,
+ final int subdirLevel, final Class classExtends,
+ final Class interfaceImplements) throws ClassNotFoundException {
+ final Map<Class,String> foundClasses = new HashMap<Class,String>();
+
+ try {
+ final String packagePath = packageName.replace('.', '/');
+ final Enumeration<URL> resources_ = classLoader.getResources(packagePath);
+
+ final Set<URI> uniqResources = new HashSet<URI>();
+ while (resources_.hasMoreElements()) {
+ final URI resource = resources_.nextElement().toURI();
+ uniqResources.add(resource);
+ }
+
+ for (final URI res : uniqResources) {
+ final URL resource = res.toURL();
+ // System.err.println("trying resource: " + resource);
+ // jars and directories are treated differently
+ if (resource.getProtocol().startsWith("jar")) {
+ findClassesInJar(packageName, classNameRegEx, subdirLevel,
+ classExtends, interfaceImplements, foundClasses, resource);
+ } else {
+ findClassesInDirectory(classLoader, packageName, classNameRegEx,
+ subdirLevel, classExtends, interfaceImplements, foundClasses,
+ resource);
+ }
+ }
+ } catch (final Exception ex) {
+ throw new ClassNotFoundException("Loading rules failed: "
+ + ex.getMessage(), ex);
+ }
+
+ return foundClasses.keySet().toArray(new Class[foundClasses.size()]);
+ }
+
+ private static void findClassesInDirectory(final ClassLoader classLoader,
+ final String packageName, final String classNameRegEx,
+ final int subdirLevel, final Class classExtends,
+ final Class interfaceImplements, final Map<Class,String> foundClasses,
+ final URL resource) throws Exception {
+ final File directory = new File(resource.toURI());
+
+ if (!directory.exists() && !directory.isDirectory()) {
+ throw new Exception("directory does not exist: "
+ + directory.getAbsolutePath());
+ }
+
+ // read classes
+ for (final File file : directory.listFiles()) {
+ if (file.isFile() && file.getName().endsWith(".class")) {
+ final String classShortNm = file.getName().substring(0,
+ file.getName().lastIndexOf('.'));
+ if (classNameRegEx == null || classShortNm.matches(classNameRegEx)) {
+ final Class clazz = Class.forName(packageName + "." + classShortNm);
+
+ if (!isMaterial(clazz)) {
+ continue;
+ }
+
+ if (classExtends == null
+ || isExtending(clazz, classExtends.getName())
+ && interfaceImplements == null
+ || isImplementing(clazz, interfaceImplements)) {
+ foundClasses.put(clazz, file.getAbsolutePath());
+ // System.err.println("Added rule from dir: " + classShortNm);
+ }
+ }
+ }
+ }
+
+ // then subdirectories if we're traversing
+ if (subdirLevel > 0) {
+ for (final File dir : directory.listFiles()) {
+ if (dir.isDirectory()) {
+ final Class[] subLevelClasses = findClasses(classLoader, packageName
+ + "." + dir.getName(), classNameRegEx, subdirLevel - 1,
+ classExtends, interfaceImplements);
+ for (Class tmpClass : subLevelClasses) {
+ foundClasses.put(tmpClass, "dir:" + dir.getAbsolutePath());
+ }
+ }
+ }
+ }
+ }
+
+ private static void findClassesInJar(final String packageName,
+ final String classNameRegEx, final int subdirLevel,
+ final Class classExtends, final Class interfaceImplements,
+ final Map<Class,String> foundClasses, final URL resource) throws IOException,
+ URISyntaxException, ClassNotFoundException {
+ final JarURLConnection conn = (JarURLConnection) resource.openConnection();
+ final JarFile currentFile = conn.getJarFile(); // new JarFile(new
+ // File(resource.toURI()));
+ // jars are flat containers:
+ for (final Enumeration<JarEntry> e = currentFile.entries(); e
+ .hasMoreElements();) {
+ final JarEntry current = e.nextElement();
+ final String name = current.getName();
+ // System.err.println("jar entry: " + name);
+
+ if (name.endsWith(".class")) {
+ final String classNm = name.replaceAll("/", ".").replace(".class", "");
+ final int pointIdx = classNm.lastIndexOf('.');
+ final String classShortNm = pointIdx == -1 ? classNm : classNm
+ .substring(pointIdx + 1);
+
+ if (classNm.startsWith(packageName)
+ && (classNameRegEx == null || classShortNm.matches(classNameRegEx))) {
+ final String subName = classNm.substring(packageName.length() + 1);
+
+ if (countOccurrences(subName, '.') > subdirLevel) {
+ continue;
+ }
+
+ final Class clazz = Class.forName(classNm);
+ if (foundClasses.containsKey(clazz)) {
+ throw new RuntimeException("Duplicate class definition:\n"
+ + clazz.getName() + ", found in\n" + currentFile.getName() + " and\n"
+ + foundClasses.get(clazz));
+ }
+
+ if (!isMaterial(clazz)) {
+ continue;
+ }
+
+ if (classExtends == null
+ || isExtending(clazz, classExtends.getName())
+ && interfaceImplements == null
+ || isImplementing(clazz, interfaceImplements)) {
+ foundClasses.put(clazz, currentFile.getName());
+ // System.err.println("Added class from jar: " + name);
+ }
+ }
+ }
+ }
+ }
+
+ private static int countOccurrences(final String str, final char ch) {
+ int i = 0;
+ int pos = str.indexOf(ch, 0);
+ while (pos != -1) {
+ i++;
+ pos = str.indexOf(ch, pos + 1);
+ }
+ return i;
+ }
+
+ private static boolean isMaterial(final Class clazz) {
+ final int mod = clazz.getModifiers();
+ return !Modifier.isAbstract(mod) && !Modifier.isInterface(mod);
+ }
+
+ /**
+ * @return Returns true if clazz extends superClassName
+ */
+ private static boolean isExtending(final Class clazz,
+ final String superClassName) {
+ Class tmpSuperClass = clazz.getSuperclass();
+ while (tmpSuperClass != null) {
+ if (superClassName.equals(tmpSuperClass.getName())) {
+ return true;
+ }
+ tmpSuperClass = tmpSuperClass.getSuperclass();
+ }
+ return false;
+ }
+
+ private static boolean isImplementing(final Class clazz, final Class interfaze) {
+ return Arrays.asList(clazz.getInterfaces()).contains(interfaze);
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java
new file mode 100644
index 0000000..af266f3
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/StringTools.java
@@ -0,0 +1,581 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.regex.Pattern;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.gui.Tools;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+
+/**
+ * Tools for reading files etc.
+ *
+ * @author Daniel Naber
+ */
+public final class StringTools {
+
+ private static final int DEFAULT_CONTEXT_SIZE = 25;
+
+ /**
+ * Constants for printing XML rule matches.
+ */
+ public static enum XmlPrintMode {
+ /**
+ * Normally output the rule matches by starting and
+ * ending the XML output on every call.
+ */
+ NORMAL_XML,
+ /**
+ * Start XML output by printing the preamble and the
+ * start of the root element.
+ */
+ START_XML,
+ /**
+ * End XML output by closing the root element.
+ */
+ END_XML,
+ /**
+ * Simply continue rule match output.
+ */
+ CONTINUE_XML
+ }
+
+ private static final Pattern XML_COMMENT_PATTERN = Pattern.compile("<!--.*?-->", Pattern.DOTALL);
+ private static final Pattern XML_PATTERN = Pattern.compile("(?<!<)<[^<>]+>", Pattern.DOTALL);
+
+
+ private StringTools() {
+ // only static stuff
+ }
+
+ /**
+ * Throw exception if the given string is null or empty or only whitespace.
+ */
+ public static void assureSet(final String s, final String varName) {
+ if (s == null) {
+ throw new NullPointerException(varName + " cannot be null");
+ }
+ if (isEmpty(s.trim())) {
+ throw new IllegalArgumentException(varName
+ + " cannot be empty or whitespace only");
+ }
+ }
+
+ /**
+ * Read a file's content.
+ */
+ public static String readFile(final InputStream file) throws IOException {
+ return readFile(file, null);
+ }
+
+ /**
+ * Read the text file using the given encoding.
+ *
+ * @param file
+ * InputStream to a file to be read
+ * @param encoding
+ * the file's character encoding (e.g. <code>iso-8859-1</code>)
+ * @return a string with the file's content, lines separated by
+ * <code>\n</code>
+ * @throws IOException
+ */
+ public static String readFile(final InputStream file, final String encoding)
+ throws IOException {
+ InputStreamReader isr = null;
+ BufferedReader br = null;
+ final StringBuilder sb = new StringBuilder();
+ try {
+ if (encoding == null) {
+ isr = new InputStreamReader(file);
+ } else {
+ isr = new InputStreamReader(file, encoding);
+ }
+ br = new BufferedReader(isr);
+ String line;
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ sb.append('\n');
+ }
+ } finally {
+ if (br != null) {
+ br.close();
+ }
+ if (isr != null) {
+ isr.close();
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns true if <code>str</code> is made up of all-uppercase characters
+ * (ignoring characters for which no upper-/lowercase distinction exists).
+ */
+ public static boolean isAllUppercase(final String str) {
+ return str.equals(str.toUpperCase());
+ }
+
+ /**
+ * @param str - input str
+ * Returns true if str is MixedCase.
+ */
+ public static boolean isMixedCase(final String str) {
+ return !isAllUppercase(str)
+ && !isCapitalizedWord(str)
+ && !str.equals(str.toLowerCase());
+ }
+
+ /**
+ * @param str - input string
+ */
+ public static boolean isCapitalizedWord(final String str) {
+ if (isEmpty(str)) {
+ return false;
+ }
+ final char firstChar = str.charAt(0);
+ if (Character.isUpperCase(firstChar)) {
+ return str.substring(1).equals(str.substring(1).toLowerCase());
+ }
+ return false;
+ }
+
+ /**
+ * Whether the first character of <code>str</code> is an uppercase character.
+ */
+ public static boolean startsWithUppercase(final String str) {
+ if (isEmpty(str)) {
+ return false;
+ }
+ final char firstChar = str.charAt(0);
+ if (Character.isUpperCase(firstChar)) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * uppercase character. If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ public static String uppercaseFirstChar(final String str) {
+ return changeFirstCharCase(str, true);
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * lowercase character. If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ public static String lowercaseFirstChar(final String str) {
+ return changeFirstCharCase(str, false);
+ }
+
+ /**
+ * Return <code>str</code> modified so that its first character is now an
+ * lowercase or uppercase character, depending on <code>toUpperCase</code>.
+ * If <code>str</code> starts with non-alphabetic
+ * characters, such as quotes or parentheses, the first character is
+ * determined as the first alphabetic character.
+ */
+ private static String changeFirstCharCase(final String str, final boolean toUpperCase) {
+ if (isEmpty(str)) {
+ return str;
+ }
+ if (str.length() == 1) {
+ return toUpperCase ? str.toUpperCase() : str.toLowerCase();
+ }
+ int pos = 0;
+ final int len = str.length() - 1;
+ while (!Character.isLetterOrDigit(str.charAt(pos)) && len > pos) {
+ pos++;
+ }
+ final char firstChar = str.charAt(pos);
+ return str.substring(0, pos)
+ + (toUpperCase ? Character.toUpperCase(firstChar) : Character.toLowerCase(firstChar))
+ + str.substring(pos + 1);
+ }
+
+ public static String readerToString(final Reader reader) throws IOException {
+ final StringBuilder sb = new StringBuilder();
+ int readBytes = 0;
+ final char[] chars = new char[4000];
+ while (readBytes >= 0) {
+ readBytes = reader.read(chars, 0, 4000);
+ if (readBytes <= 0) {
+ break;
+ }
+ sb.append(new String(chars, 0, readBytes));
+ }
+ return sb.toString();
+ }
+
+ public static String streamToString(final InputStream is) throws IOException {
+ final InputStreamReader isr = new InputStreamReader(is);
+ try {
+ return readerToString(isr);
+ } finally {
+ isr.close();
+ }
+ }
+
+ /**
+ * Calls escapeHTML(String).
+ */
+ public static String escapeXML(final String s) {
+ return escapeHTML(s);
+ }
+
+ /**
+ * Escapes these characters: less than, bigger than, quote, ampersand.
+ */
+ public static String escapeHTML(final String s) {
+ // this version is much faster than using s.replaceAll
+ final StringBuilder sb = new StringBuilder();
+ final int n = s.length();
+ for (int i = 0; i < n; i++) {
+ final char c = s.charAt(i);
+ switch (c) {
+ case '<':
+ sb.append("&lt;");
+ break;
+ case '>':
+ sb.append("&gt;");
+ break;
+ case '&':
+ sb.append("&amp;");
+ break;
+ case '"':
+ sb.append("&quot;");
+ break;
+
+ default:
+ sb.append(c);
+ break;
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Get an XML representation of the given rule matches.
+ *
+ * @param text
+ * the original text that was checked, used to get the context of the
+ * matches
+ * @param contextSize
+ * the desired context size in characters
+ * @deprecated Use {@link #ruleMatchesToXML(List,String,int,XmlPrintMode)} instead
+ */
+ public static String ruleMatchesToXML(final List<RuleMatch> ruleMatches,
+ final String text, final int contextSize) {
+ return ruleMatchesToXML(ruleMatches, text, contextSize, XmlPrintMode.NORMAL_XML);
+ }
+
+ /**
+ * Get an XML representation of the given rule matches.
+ * @param text
+ * the original text that was checked, used to get the context of the
+ * matches
+ * @param contextSize
+ * the desired context size in characters
+ * @param xmlMode how to print the XML
+ */
+ public static String ruleMatchesToXML(final List<RuleMatch> ruleMatches,
+ final String text, final int contextSize, final XmlPrintMode xmlMode) {
+ //
+ // IMPORTANT: people rely on this format, don't change it!
+ //
+ final StringBuilder xml = new StringBuilder();
+
+ if (xmlMode == XmlPrintMode.NORMAL_XML || xmlMode == XmlPrintMode.START_XML) {
+ xml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ xml.append("<matches>\n");
+ }
+
+ for (final RuleMatch match : ruleMatches) {
+ String subId = "";
+ if (match.getRule() instanceof PatternRule) {
+ final PatternRule pRule = (PatternRule) match.getRule();
+ if (pRule.getSubId() != null) {
+ subId = " subId=\"" + escapeXMLForAPIOutput(pRule.getSubId()) + "\" ";
+ }
+ }
+ xml.append("<error" + " fromy=\"" + match.getLine() + "\"" + " fromx=\""
+ + (match.getColumn() - 1) + "\"" + " toy=\"" + match.getEndLine() + "\""
+ + " tox=\"" + (match.getEndColumn() - 1) + "\"" + " ruleId=\""
+ + match.getRule().getId() + "\"");
+ final String msg = match.getMessage().replaceAll("</?suggestion>", "'");
+ xml.append(subId);
+ xml.append(" msg=\"" + escapeXMLForAPIOutput(msg) + "\"");
+ final String START_MARKER = "__languagetool_start_marker";
+ String context = Tools.getContext(match.getFromPos(), match.getToPos(),
+ text, contextSize, START_MARKER, "", true);
+ xml.append(" replacements=\""
+ + escapeXMLForAPIOutput(listToString(
+ match.getSuggestedReplacements(), "#")) + "\"");
+ // get position of error in context and remove artificial marker again:
+ final int contextOffset = context.indexOf(START_MARKER);
+ context = context.replaceFirst(START_MARKER, "");
+ context = context.replaceAll("[\n\r]", " ");
+ xml.append(" context=\"" + context + "\"");
+ xml.append(" contextoffset=\"" + contextOffset + "\"");
+ xml.append(" errorlength=\"" + (match.getToPos() - match.getFromPos())
+ + "\"");
+ xml.append("/>\n");
+ }
+ if (xmlMode == XmlPrintMode.END_XML || xmlMode == XmlPrintMode.NORMAL_XML) {
+ xml.append("</matches>\n");
+ }
+ return xml.toString();
+ }
+
+ private static String escapeXMLForAPIOutput(final String s) {
+ // this is simplified XML, i.e. put the "<error>" in one line:
+ return escapeXML(s).replaceAll("[\n\r]", " ");
+ }
+
+ public static String listToString(final Collection<String> l, final String delimiter) {
+ final StringBuilder sb = new StringBuilder();
+ for (final Iterator<String> iter = l.iterator(); iter.hasNext();) {
+ final String str = iter.next();
+ sb.append(str);
+ if (iter.hasNext()) {
+ sb.append(delimiter);
+ }
+ }
+ return sb.toString();
+ }
+
+ public static String getContext(final int fromPos, final int toPos,
+ final String fileContents) {
+ return getContext(fromPos, toPos, fileContents, DEFAULT_CONTEXT_SIZE);
+ }
+
+ public static String getContext(final int fromPos, final int toPos,
+ final String contents, final int contextSize) {
+ final String fileContents = contents.replace('\n', ' ');
+ // calculate context region:
+ int startContent = fromPos - contextSize;
+ String prefix = "...";
+ String postfix = "...";
+ String markerPrefix = " ";
+ if (startContent < 0) {
+ prefix = "";
+ markerPrefix = "";
+ startContent = 0;
+ }
+ int endContent = toPos + contextSize;
+ if (endContent > fileContents.length()) {
+ postfix = "";
+ endContent = fileContents.length();
+ }
+ // make "^" marker. inefficient but robust implementation:
+ final StringBuilder marker = new StringBuilder();
+ for (int i = 0; i < fileContents.length() + prefix.length(); i++) {
+ if (i >= fromPos && i < toPos) {
+ marker.append('^');
+ } else {
+ marker.append(' ');
+ }
+ }
+ // now build context string plus marker:
+ final StringBuilder sb = new StringBuilder();
+ sb.append(prefix);
+ sb.append(fileContents.substring(startContent, endContent));
+ sb.append(postfix);
+ sb.append('\n');
+ sb.append(markerPrefix);
+ sb.append(marker.substring(startContent, endContent));
+ return sb.toString();
+ }
+
+ /**
+ * Filters any whitespace characters. Useful for trimming the contents of
+ * token elements that cannot possibly contain any spaces.
+ *
+ * @param str
+ * String to be filtered.
+ * @return Filtered string.
+ */
+ public static String trimWhitespace(final String str) {
+ final StringBuilder filter = new StringBuilder();
+ for (int i = 0; i < str.length(); i++) {
+ final char c = str.charAt(i);
+ if (c != '\n' && c != ' ' && c != '\t') {
+ filter.append(c);
+ }
+ }
+ return filter.toString();
+ }
+
+ /**
+ * Adds spaces before words that are not punctuation.
+ *
+ * @param word
+ * Word to add the preceding space.
+ * @param language
+ * Language of the word (to check typography conventions). Currently
+ * French convention of not adding spaces only before '.' and ',' is
+ * implemented; other languages assume that before ,.;:!? no spaces
+ * should be added.
+ * @return String containing a space or an empty string.
+ */
+ public static String addSpace(final String word, final Language language) {
+ String space = " ";
+ final int len = word.length();
+ if (len == 1) {
+ final char c = word.charAt(0);
+ if (Language.FRENCH.equals(language)) {
+ if (c == '.' || c == ',') {
+ space = "";
+ }
+ } else {
+ if (c == '.' || c == ',' || c == ';' || c == ':' || c == '?'
+ || c == '!') {
+ space = "";
+ }
+ }
+ }
+ return space;
+ }
+
+ /**
+ * Returns translation of the UI element without the control character "&". To
+ * have "&" in the UI, use "&&".
+ *
+ * @param label
+ * Label to convert.
+ * @return String UI element string without mnemonics.
+ */
+ public static String getLabel(final String label) {
+ return label.replaceAll("&([^&])", "$1").
+ replaceAll("&&", "&");
+ }
+
+ /**
+ * Returns the UI element string with mnemonics encoded in OpenOffice.org
+ * convention (using "~").
+ *
+ * @param label
+ * Label to convert
+ * @return String UI element with ~ replacing &.
+ */
+ public static String getOOoLabel(final String label) {
+ return label.replaceAll("&([^&])", "~$1").
+ replaceAll("&&", "&");
+ }
+
+ /**
+ * Returns mnemonic of a UI element.
+ *
+ * @param label
+ * String Label of the UI element
+ * @return @char Mnemonic of the UI element, or \u0000 in case of no mnemonic
+ * set.
+ */
+ public static char getMnemonic(final String label) {
+ int mnemonicPos = label.indexOf('&');
+ while (mnemonicPos != -1 && mnemonicPos == label.indexOf("&&")
+ && mnemonicPos < label.length()) {
+ mnemonicPos = label.indexOf('&', mnemonicPos + 2);
+ }
+ if (mnemonicPos == -1 || mnemonicPos == label.length()) {
+ return '\u0000';
+ }
+ return label.charAt(mnemonicPos + 1);
+ }
+
+ /**
+ * Checks if a string contains only whitespace, including all Unicode
+ * whitespace.
+ *
+ * @param str
+ * String to check
+ * @return true if the string is whitespace-only.
+ */
+ public static boolean isWhitespace(final String str) {
+ if ("\u0002".equals(str) // unbreakable field, e.g. a footnote number in OOo
+ || "\u0001".equals(str)) { // breakable field in OOo
+ return false;
+ }
+ final String trimStr = str.trim();
+ if (isEmpty(trimStr)) {
+ return true;
+ }
+ if (trimStr.length() == 1) {
+ return java.lang.Character.isWhitespace(trimStr.charAt(0));
+ }
+ return false;
+ }
+
+ /**
+ *
+ * @param ch
+ * Character to check
+ * @return True if the character is a positive number (decimal digit from 1 to
+ * 9).
+ */
+ public static boolean isPositiveNumber(final char ch) {
+ return ch >= '1' && ch <= '9';
+ }
+
+ /**
+ * Helper method to replace calls to "".equals().
+ *
+ * @param str
+ * String to check
+ * @return true if string is empty OR null
+ */
+ public static boolean isEmpty(final String str) {
+ return str == null || str.length() == 0;
+ }
+
+ /**
+ * Simple XML filtering routing
+ * @param str XML string to be filtered.
+ * @return Filtered string without XML tags.
+ */
+ public static String filterXML(final String str) {
+ String s = str;
+ s = XML_COMMENT_PATTERN.matcher(s).replaceAll(" ");
+ s = XML_PATTERN.matcher(s).replaceAll("");
+ return s;
+ }
+
+ public static String asString(final CharSequence s) {
+ if (s == null) {
+ return null;
+ }
+ return s.toString();
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java
new file mode 100644
index 0000000..6a6432a
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/SymbolLocator.java
@@ -0,0 +1,37 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2010 Marcin Miłkowski (http://www.languagetool.org)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tools;
+
+/**
+ * Helper class for GenericUnpairedBracketsRule to identify
+ * symbols indexed with integers.
+ *
+ * @author Marcin Miłkowski
+ *
+ */
+public class SymbolLocator {
+ public String symbol;
+ public int index;
+
+ public SymbolLocator(final String symbol, final int index) {
+ this.symbol = symbol;
+ this.index = index;
+ }
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java
new file mode 100644
index 0000000..c5d1984
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/Tools.java
@@ -0,0 +1,626 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool.tools;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import de.danielnaber.languagetool.AnalyzedSentence;
+import de.danielnaber.languagetool.JLanguageTool;
+import de.danielnaber.languagetool.Language;
+import de.danielnaber.languagetool.bitext.BitextReader;
+import de.danielnaber.languagetool.bitext.StringPair;
+import de.danielnaber.languagetool.rules.Rule;
+import de.danielnaber.languagetool.rules.RuleMatch;
+import de.danielnaber.languagetool.rules.bitext.BitextRule;
+import de.danielnaber.languagetool.rules.patterns.PatternRule;
+import de.danielnaber.languagetool.rules.patterns.bitext.BitextPatternRuleLoader;
+import de.danielnaber.languagetool.rules.patterns.bitext.FalseFriendsAsBitextLoader;
+import de.danielnaber.languagetool.tools.StringTools.XmlPrintMode;
+
+public final class Tools {
+
+ private static final int DEFAULT_CONTEXT_SIZE = 45;
+
+ private Tools() {
+ // cannot construct, static methods only
+ }
+
+ /**
+ * Tags text using the LanguageTool tagger.
+ *
+ * @param contents
+ * Text to tag.
+ * @param lt
+ * LanguageTool instance
+ * @throws IOException
+ */
+ public static void tagText(final String contents, final JLanguageTool lt)
+ throws IOException {
+ AnalyzedSentence analyzedText;
+ final List<String> sentences = lt.sentenceTokenize(contents);
+ for (final String sentence : sentences) {
+ analyzedText = lt.getAnalyzedSentence(sentence);
+ System.out.println(analyzedText.toString());
+ }
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt)
+ throws IOException {
+ return checkText(contents, lt, false, -1, 0, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt, final int lineOffset)
+ throws IOException {
+ return checkText(contents, lt, false, -1, lineOffset, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ public static int checkText(final String contents, final JLanguageTool lt,
+ final boolean apiFormat, final int lineOffset) throws IOException {
+ return checkText(contents, lt, apiFormat, -1, lineOffset, 0, StringTools.XmlPrintMode.NORMAL_XML);
+ }
+
+ /**
+ * Check the given text and print results to System.out.
+ *
+ * @param contents
+ * a text to check (may be more than one sentence)
+ * @param lt
+ * Initialized LanguageTool
+ * @param apiFormat
+ * whether to print the result in a simple XML format
+ * @param contextSize
+ * error text context size: -1 for default
+ * @param lineOffset
+ * line number offset to be added to line numbers in matches
+ * @param prevMatches
+ * number of previously matched rules
+ * @param xmlMode
+ * mode of xml printout for simple xml output
+ * @return
+ * Number of rule matches to the input text.
+ * @throws IOException
+ */
+ public static int checkText(final String contents, final JLanguageTool lt,
+ final boolean apiFormat, int contextSize, final int lineOffset,
+ final int prevMatches, final XmlPrintMode xmlMode) throws IOException {
+ if (contextSize == -1) {
+ contextSize = DEFAULT_CONTEXT_SIZE;
+ }
+ final long startTime = System.currentTimeMillis();
+ final List<RuleMatch> ruleMatches = lt.check(contents);
+ // adjust line numbers
+ for (RuleMatch r : ruleMatches) {
+ r.setLine(r.getLine() + lineOffset);
+ r.setEndLine(r.getEndLine() + lineOffset);
+ }
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(ruleMatches, contents,
+ contextSize, xmlMode);
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(ruleMatches, prevMatches, contents, contextSize);
+ }
+
+ //display stats if it's not in a buffered mode
+ if (xmlMode == StringTools.XmlPrintMode.NORMAL_XML) {
+ displayTimeStats(startTime, lt.getSentenceCount(), apiFormat);
+ }
+ return ruleMatches.size();
+ }
+
+ private static void displayTimeStats(final long startTime,
+ final long sentCount, final boolean apiFormat) {
+ final long endTime = System.currentTimeMillis();
+ final long time = endTime - startTime;
+ final float timeInSeconds = time / 1000.0f;
+ final float sentencesPerSecond = sentCount / timeInSeconds;
+ if (apiFormat) {
+ System.out.println("<!--");
+ }
+ System.out.printf(Locale.ENGLISH,
+ "Time: %dms for %d sentences (%.1f sentences/sec)", time,
+ sentCount, sentencesPerSecond);
+ System.out.println();
+ if (apiFormat) {
+ System.out.println("-->");
+ }
+ }
+
+ /**
+ * Displays matches in a simple text format.
+ * @param ruleMatches Matches from rules.
+ * @param prevMatches Number of previously found matches.
+ * @param contents The text that was checked.
+ * @param contextSize The size of contents displayed.
+ * @since 1.0.1
+ */
+ private static void printMatches(final List<RuleMatch> ruleMatches,
+ final int prevMatches, final String contents, final int contextSize) {
+ int i = 1;
+ for (final RuleMatch match : ruleMatches) {
+ String output = i + prevMatches + ".) Line " + (match.getLine() + 1) + ", column "
+ + match.getColumn() + ", Rule ID: " + match.getRule().getId();
+ if (match.getRule() instanceof PatternRule) {
+ final PatternRule pRule = (PatternRule) match.getRule();
+ output += "[" + pRule.getSubId() + "]";
+ }
+ System.out.println(output);
+ String msg = match.getMessage();
+ msg = msg.replaceAll("<suggestion>", "'");
+ msg = msg.replaceAll("</suggestion>", "'");
+ System.out.println("Message: " + msg);
+ final List<String> replacements = match.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ System.out.println("Suggestion: "
+ + StringTools.listToString(replacements, "; "));
+ }
+ System.out.println(StringTools.getContext(match.getFromPos(), match
+ .getToPos(), contents, contextSize));
+ if (i < ruleMatches.size()) {
+ System.out.println();
+ }
+ i++;
+ }
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * NOTE: the positions returned by the rule matches are relative
+ * to the target string only, and always start at the first line
+ * and first column, no matter how many lines were checked before.
+ * To have multiple lines taken into account, use the checkBitext
+ * method that takes a BitextReader.
+ *
+ * @param src Source text.
+ * @param trg Target text.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @param apiFormat Whether API format should be used.
+ * @param xmlMode The mode of XML output display.
+ * @return The number of rules matched on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static int checkBitext(final String src, final String trg,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules,
+ final boolean apiFormat, final XmlPrintMode xmlMode) throws IOException {
+ final long startTime = System.currentTimeMillis();
+ final int contextSize = DEFAULT_CONTEXT_SIZE;
+ final List<RuleMatch> ruleMatches =
+ checkBitext(src, trg, srcLt, trgLt, bRules);
+ for (RuleMatch thisMatch : ruleMatches) {
+ thisMatch =
+ trgLt.adjustRuleMatchPos(thisMatch,
+ 0, 1, 1, trg);
+ }
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(ruleMatches, trg,
+ contextSize, xmlMode);
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(ruleMatches, 0, trg, contextSize);
+ }
+ //display stats if it's not in a buffered mode
+ if (xmlMode == StringTools.XmlPrintMode.NORMAL_XML) {
+ displayTimeStats(startTime, srcLt.getSentenceCount(), apiFormat);
+ }
+ return ruleMatches.size();
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * NOTE: the positions returned by the rule matches are adjusted
+ * according to the data returned by the reader.
+ *
+ * @param reader Reader of bitext strings.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @param apiFormat Whether API format should be used.
+ * @param xmlMode The mode of XML output display.
+ * @return The number of rules matched on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static int checkBitext(final BitextReader reader,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules,
+ final boolean apiFormat) throws IOException {
+ final long startTime = System.currentTimeMillis();
+ final int contextSize = DEFAULT_CONTEXT_SIZE;
+ XmlPrintMode xmlMode = StringTools.XmlPrintMode.START_XML;
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ int matchCount = 0;
+ int sentCount = 0;
+ for (StringPair srcAndTrg : reader) {
+ final List<RuleMatch> curMatches = checkBitext(
+ srcAndTrg.getSource(), srcAndTrg.getTarget(),
+ srcLt, trgLt, bRules);
+ final List<RuleMatch> fixedMatches = new ArrayList<RuleMatch>();
+ for (RuleMatch thisMatch : curMatches) {
+ fixedMatches.add(
+ trgLt.adjustRuleMatchPos(thisMatch,
+ reader.getSentencePosition(),
+ reader.getColumnCount(),
+ reader.getLineCount(),
+ reader.getCurrentLine()));
+ }
+ ruleMatches.addAll(fixedMatches);
+ if (fixedMatches.size() > 0) {
+ if (apiFormat) {
+ final String xml = StringTools.ruleMatchesToXML(fixedMatches,
+ reader.getCurrentLine(),
+ contextSize, xmlMode);
+ if (xmlMode == StringTools.XmlPrintMode.START_XML) {
+ xmlMode = StringTools.XmlPrintMode.CONTINUE_XML;
+ }
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print(xml);
+ } else {
+ printMatches(fixedMatches, matchCount, reader.getCurrentLine(), contextSize);
+ matchCount += fixedMatches.size();
+ }
+ }
+ sentCount++;
+ }
+ displayTimeStats(startTime, sentCount, apiFormat);
+ if (apiFormat) {
+ PrintStream out = new PrintStream(System.out, true, "UTF-8");
+ out.print("</matches>");
+ }
+ return ruleMatches.size();
+ }
+
+ /**
+ * Checks the bilingual input (bitext) and displays the output (considering the target
+ * language) in API format or in the simple text format.
+ *
+ * @param src Source text.
+ * @param trg Target text.
+ * @param srcLt Source JLanguageTool (used to analyze the text).
+ * @param trgLt Target JLanguageTool (used to analyze the text).
+ * @param bRules Bilingual rules used in addition to target standard rules.
+ * @return The list of rule matches on the bitext.
+ * @throws IOException
+ * @since 1.0.1
+ */
+ public static List<RuleMatch> checkBitext(final String src, final String trg,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules) throws IOException {
+ final AnalyzedSentence srcText = srcLt.getAnalyzedSentence(src);
+ final AnalyzedSentence trgText = trgLt.getAnalyzedSentence(trg);
+ final List<RuleMatch> ruleMatches = trgLt.checkAnalyzedSentence
+ (JLanguageTool.paragraphHandling.NORMAL,
+ trgLt.getAllRules(), 0, 0, 1, trg, trgText);
+ for (BitextRule bRule : bRules) {
+ final RuleMatch[] curMatch = bRule.match(srcText, trgText);
+ if (curMatch != null) {
+ ruleMatches.addAll(Arrays.asList(curMatch));
+ }
+ }
+ return ruleMatches;
+ }
+
+
+ /**
+ * Gets default bitext rules for a given pair of languages
+ * @param source Source language.
+ * @param target Target language.
+ * @return List of Bitext rules
+ * @throws IOException
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ */
+ public static List<BitextRule> getBitextRules(final Language source,
+ final Language target) throws IOException, ParserConfigurationException, SAXException {
+ final List<BitextRule> bRules = new ArrayList<BitextRule>();
+ //try to load the bitext pattern rules for the language...
+ final BitextPatternRuleLoader ruleLoader = new BitextPatternRuleLoader();
+ final String name = "/" + target.getShortName() + "/bitext.xml";
+ final InputStream is = JLanguageTool.getDataBroker().getFromRulesDirAsStream(name);
+ if (is != null) {
+ bRules.addAll(ruleLoader.getRules(is, name));
+ }
+
+ //load the false friend rules in the bitext mode
+ final FalseFriendsAsBitextLoader fRuleLoader = new FalseFriendsAsBitextLoader();
+ final String fName = "/false-friends.xml";
+ bRules.addAll(fRuleLoader.
+ getFalseFriendsAsBitext(
+ JLanguageTool.getDataBroker().getRulesDir() + fName,
+ source, target));
+
+ //load Java bitext rules
+ // TODO: get ResourceBundle for possible parameters for rules
+ bRules.addAll(getAllBuiltinBitextRules(source, null));
+ return bRules;
+ }
+
+ private static List<BitextRule> getAllBuiltinBitextRules(final Language language,
+ final ResourceBundle messages) {
+ // use reflection to get a list of all non-pattern rules under
+ // "de.danielnaber.languagetool.rules.bitext"
+ // generic rules first, then language-specific ones
+ // TODO: the order of loading classes is not guaranteed so we may want to
+ // implement rule
+ // precedence
+
+ final List<BitextRule> rules = new ArrayList<BitextRule>();
+ try {
+ // we pass ".*Rule$" regexp to improve efficiency, see javadoc
+ final Class[] classes = ReflectionUtils.findClasses(Rule.class
+ .getClassLoader(), Rule.class.getPackage().getName()
+ + ".bitext", ".*Rule$", 0,
+ Rule.class, null);
+
+ for (final Class class1 : classes) {
+ final Constructor[] constructors = class1.getConstructors();
+ for (final Constructor constructor : constructors) {
+ final Class[] paramTypes = constructor.getParameterTypes();
+ if (paramTypes.length == 0) {
+ rules.add((BitextRule) constructor.newInstance());
+ break;
+ }
+ if (paramTypes.length == 1
+ && paramTypes[0].equals(ResourceBundle.class)) {
+ rules.add((BitextRule) constructor.newInstance(messages));
+ break;
+ }
+ if (paramTypes.length == 2
+ && paramTypes[0].equals(ResourceBundle.class)
+ && paramTypes[1].equals(Language.class)) {
+ rules.add((BitextRule) constructor.newInstance(messages, language));
+ break;
+ }
+ throw new RuntimeException("Unknown constructor for rule class: "
+ + class1.getName());
+ }
+ }
+ } catch (final Exception e) {
+ throw new RuntimeException("Failed to load rules: " + e.getMessage(), e);
+ }
+ // System.err.println("Loaded " + rules.size() + " rules");
+ return rules;
+ }
+
+
+ /**
+ * Simple rule profiler - used to run LT on a corpus to see which
+ * rule takes most time.
+ * @param contents - text to check
+ * @param lt - instance of LanguageTool
+ * @return number of matches
+ * @throws IOException
+ */
+ public static void profileRulesOnText(final String contents,
+ final JLanguageTool lt) throws IOException {
+ final long[] workTime = new long[10];
+ int matchCount = 0;
+ final List<Rule> rules = lt.getAllRules();
+ final int ruleCount = rules.size();
+ System.out.printf("Testing %d rules\n", ruleCount);
+ System.out.println("Rule ID\tTime\tSentences\tMatches\tSentences per sec.");
+ final List<String> sentences = lt.sentenceTokenize(contents);
+ for (Rule rule : rules) {
+ matchCount = 0;
+ for (int k = 0; k < 10; k++) {
+ final long startTime = System.currentTimeMillis();
+ for (String sentence : sentences) {
+ matchCount += rule.match
+ (lt.getAnalyzedSentence(sentence)).length;
+ }
+ final long endTime = System.currentTimeMillis();
+ workTime[k] = endTime - startTime;
+ }
+ Arrays.sort(workTime);
+ final long time = median(workTime);
+ final float timeInSeconds = time / 1000.0f;
+ final float sentencesPerSecond = sentences.size() / timeInSeconds;
+ System.out.printf(Locale.ENGLISH,
+ "%s\t%d\t%d\t%d\t%.1f", rule.getId(),
+ time, sentences.size(), matchCount, sentencesPerSecond);
+ System.out.println();
+ }
+ }
+
+ public static int profileRulesOnLine(final String contents,
+ final JLanguageTool lt, final Rule rule) throws IOException {
+ int count = 0;
+ for (final String sentence : lt.sentenceTokenize(contents)) {
+ count += rule.match(lt.getAnalyzedSentence(sentence)).length ;
+ }
+ return count;
+ }
+
+ public static long median(long[] m) {
+ final int middle = m.length / 2; // subscript of middle element
+ if (m.length % 2 == 1) {
+ // Odd number of elements -- return the middle one.
+ return m[middle];
+ }
+ return (m[middle-1] + m[middle]) / 2;
+ }
+
+ /**
+ * Automatically applies suggestions to the text.
+ * Note: if there is more than one suggestion, always the first
+ * one is applied, and others ignored silently.
+ *
+ * @param
+ * contents - String to be corrected
+ * @param
+ * lt - Initialized LanguageTool object
+ * @return
+ * Corrected text as String.
+ */
+ public static String correctText(final String contents, final JLanguageTool lt) throws IOException {
+ final List<RuleMatch> ruleMatches = lt.check(contents);
+ if (ruleMatches.isEmpty()) {
+ return contents;
+ }
+ return correctTextFromMatches(contents, ruleMatches);
+ }
+
+ /**
+ * Automatically applies suggestions to the bilingual text.
+ * Note: if there is more than one suggestion, always the first
+ * one is applied, and others ignored silently.
+ *
+ * @param
+ * reader - a bitext file reader
+ * @param
+ * sourceLanguageTool Initialized source JLanguageTool object
+ * @param
+ * targetLanguageTool Initialized target JLanguageTool object
+ * @param
+ * bRules List of all BitextRules to use
+ */
+ public static void correctBitext(final BitextReader reader,
+ final JLanguageTool srcLt, final JLanguageTool trgLt,
+ final List<BitextRule> bRules) throws IOException {
+ //TODO: implement a bitext writer for XML formats (like XLIFF)
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ for (StringPair srcAndTrg : reader) {
+ final List<RuleMatch> curMatches = checkBitext(
+ srcAndTrg.getSource(), srcAndTrg.getTarget(),
+ srcLt, trgLt, bRules);
+ final List<RuleMatch> fixedMatches = new ArrayList<RuleMatch>();
+ for (RuleMatch thisMatch : curMatches) {
+ fixedMatches.add(
+ trgLt.adjustRuleMatchPos(thisMatch,
+ 0, //don't need to adjust at all, we have zero offset related to trg sentence
+ reader.getTargetColumnCount(),
+ reader.getLineCount(),
+ reader.getCurrentLine()));
+ }
+ ruleMatches.addAll(fixedMatches);
+ if (fixedMatches.size() > 0) {
+ System.out.println(correctTextFromMatches(srcAndTrg.getTarget(),
+ fixedMatches));
+ } else {
+ System.out.println(srcAndTrg.getTarget());
+ }
+ }
+ }
+
+ private static String correctTextFromMatches(
+ final String contents, final List<RuleMatch> matches) {
+ final StringBuilder sb = new StringBuilder(contents);
+ //build error list:
+ final List<String> errors = new ArrayList<String>();
+ for (RuleMatch rm : matches) {
+ final List<String> replacements = rm.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ errors.add(sb.substring(rm.getFromPos(), rm.getToPos()));
+ }
+ }
+ int offset = 0;
+ int counter = 0;
+ for (RuleMatch rm : matches) {
+ final List<String> replacements = rm.getSuggestedReplacements();
+ if (!replacements.isEmpty()) {
+ //make sure the error hasn't been already corrected:
+ if (errors.get(counter).equals(sb.substring(rm.getFromPos() - offset, rm.getToPos() - offset))) {
+ sb.replace(rm.getFromPos() - offset,
+ rm.getToPos() - offset, replacements.get(0));
+ offset += (rm.getToPos() - rm.getFromPos())
+ - replacements.get(0).length();
+ }
+ counter++;
+ }
+ }
+ return sb.toString();
+ }
+
+ public static InputStream getInputStream(final String resourcePath) throws IOException {
+ try {
+ // try the URL first:
+ final URL url = new URL(resourcePath);
+ // success, load the resource.
+ return url.openStream();
+ } catch (final MalformedURLException e) {
+ // no luck. Fallback to class loader paths.
+ }
+ // try file path:
+ final File f = new File(resourcePath);
+ if (f.exists() && f.isFile() && f.canRead()) {
+ return new BufferedInputStream(new FileInputStream(f));
+ }
+ throw new IOException(
+ "Could not open input stream from URL/resource/file: "
+ + f.getAbsolutePath());
+ }
+
+ /**
+ * Get a stacktrace as a string.
+ */
+ public static String getFullStackTrace(final Throwable e) {
+ final StringWriter sw = new StringWriter();
+ final PrintWriter pw = new PrintWriter(sw);
+ e.printStackTrace(pw);
+ return sw.toString();
+ }
+
+ /**
+ * Load a file form the classpath using getResourceAsStream().
+ *
+ * @param filename
+ * @return the stream of the file
+ * @throws IOException
+ * if the file cannot be loaded
+ */
+ public static InputStream getStream(final String filename) throws IOException {
+ // the other ways to load the stream like
+ // "Tools.class.getClass().getResourceAsStream(filename)"
+ // don't work in a web context (using Grails):
+ final InputStream is = Tools.class.getResourceAsStream(filename);
+ if (is == null) {
+ throw new IOException("Could not load file from classpath : " + filename);
+ }
+ return is;
+ }
+
+}
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java
new file mode 100644
index 0000000..d7c2bfc
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/tools/UnsyncStack.java
@@ -0,0 +1,127 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2009 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.tools;
+
+import java.util.ArrayList;
+import java.util.EmptyStackException;
+
+/**
+ * Implements unsynchronized stack (contrary to default Java java.util.Stack,
+ * this one is based on ArrayList). Usage is the same as the java.util.Stack.
+ *
+ * @author Marcin Miłkowski.
+ *
+ */
+
+public class UnsyncStack<E> extends ArrayList<E> {
+ /**
+ * Generated automatically.
+ */
+ private static final long serialVersionUID = -4984830372178073605L;
+
+ public UnsyncStack() {
+ }
+
+ /**
+ * Pushes an item onto the top of this stack. This has exactly the same effect
+ * as: <blockquote>
+ *
+ * <pre>
+ * add(item)
+ * </pre>
+ *
+ * </blockquote>
+ *
+ * @param item
+ * the item to be pushed onto this stack.
+ * @return the <code>item</code> argument.
+ * @see java.util.ArrayList#add
+ */
+ public E push(E item) {
+ add(item);
+ return item;
+ }
+
+ /**
+ * Removes the object at the top of this stack and returns that object as the
+ * value of this function.
+ *
+ * @return The object at the top of this stack (the last item of the
+ * <tt>ArrayList</tt> object).
+ * @exception EmptyStackException
+ * if this stack is empty.
+ */
+ public E pop() {
+ E obj;
+ int len = size();
+ obj = peek();
+ remove(len - 1);
+ return obj;
+ }
+
+ /**
+ * Looks at the object at the top of this stack without removing it from the
+ * stack.
+ *
+ * @return the object at the top of this stack (the last item of the
+ * <tt>ArrayList</tt> object).
+ * @exception EmptyStackException
+ * if this stack is empty.
+ */
+ public E peek() {
+ int len = size();
+ if (len == 0)
+ throw new EmptyStackException();
+ return get(len - 1);
+ }
+
+ /**
+ * Tests if this stack is empty.
+ *
+ * @return <code>true</code> if and only if this stack contains no items;
+ * <code>false</code> otherwise.
+ */
+ public boolean empty() {
+ return size() == 0;
+ }
+
+ /**
+ * Returns the 1-based position where an object is on this stack. If the
+ * object <tt>o</tt> occurs as an item in this stack, this method returns the
+ * distance from the top of the stack of the occurrence nearest the top of the
+ * stack; the topmost item on the stack is considered to be at distance
+ * <tt>1</tt>. The <tt>equals</tt> method is used to compare <tt>o</tt> to the
+ * items in this stack.
+ *
+ * @param o
+ * the desired object.
+ * @return the 1-based position from the top of the stack where the object is
+ * located; the return value <code>-1</code> indicates that the object
+ * is not on the stack.
+ */
+ public int search(Object o) {
+ int i = lastIndexOf(o);
+ if (i >= 0) {
+ return size() - i;
+ }
+ return -1;
+ }
+
+}