added more files, to complete languagetool uploadHEAD master

author: Arno Teigseth <arno@teigseth.no> 2011-02-05 08:48:27 +0000
committer: Arno Teigseth <arno@teigseth.no> 2011-02-05 08:48:27 +0000
commit: 4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch)
tree: 7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
parent: ecaee85ab5984ebadd56721c295dc26b3335f7ce (diff)
download: grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.gz
grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.bz2
grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.xz
1 files changed, 197 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
new file mode 100644
index 0000000..6c50282
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
@@ -0,0 +1,197 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package de.danielnaber.languagetool;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings;
+
+/**
+ * A sentence that has been tokenized and analyzed.
+ * 
+ * @author Daniel Naber
+ */
+public class AnalyzedSentence {
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + Arrays.hashCode(nonBlankTokens);
+    result = prime * result + Arrays.hashCode(tokens);
+    result = prime * result + Arrays.hashCode(whPositions);
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    final AnalyzedSentence other = (AnalyzedSentence) obj;
+    if (!Arrays.equals(nonBlankTokens, other.nonBlankTokens))
+      return false;
+    if (!Arrays.equals(tokens, other.tokens))
+      return false;
+    if (!Arrays.equals(whPositions, other.whPositions))
+      return false;
+    return true;
+  }
+
+  private AnalyzedTokenReadings[] tokens;
+
+  private AnalyzedTokenReadings[] nonBlankTokens;
+
+  /**
+   * Array mapping positions of tokens as returned with
+   * getTokensWithoutWhitespace() to the internal tokens array.
+   */
+  private int[] whPositions;
+
+  /**
+   * Sets {@link AnalyzedTokenReadings}. Whitespace is also a token.
+   */
+  public AnalyzedSentence(final AnalyzedTokenReadings[] tokens) {    
+    this.tokens = tokens;
+  }
+  
+  public AnalyzedSentence(final AnalyzedTokenReadings[] tokens, final 
+      int[] whPositions) {
+    this.tokens = tokens;
+    this.setWhPositions(whPositions);
+    getTokensWithoutWhitespace();
+  } 
+
+  /**
+   * Returns the {@link AnalyzedTokenReadings} of the analyzed text. Whitespace
+   * is also a token.
+   */
+  public final AnalyzedTokenReadings[] getTokens() {
+    return tokens;
+  }
+
+  /**
+   * Returns the {@link AnalyzedTokenReadings} of the analyzed text, with
+   * whitespace tokens removed but with the artificial <code>SENT_START</code>
+   * token included.
+   */
+  public final AnalyzedTokenReadings[] getTokensWithoutWhitespace() {
+    if (nonBlankTokens == null) {
+      int whCounter = 0;
+      int nonWhCounter = 0;
+      final int[] mapping = new int[tokens.length + 1];
+      final List<AnalyzedTokenReadings> l = new ArrayList<AnalyzedTokenReadings>();
+      for (final AnalyzedTokenReadings token : tokens) {
+        if (!token.isWhitespace() || token.isSentStart() || token.isSentEnd()
+            || token.isParaEnd()) {
+          l.add(token);
+          mapping[nonWhCounter] = whCounter;
+          nonWhCounter++;
+        }
+        whCounter++;
+      }
+      setNonBlankTokens(l.toArray(new AnalyzedTokenReadings[l.size()]));
+      setWhPositions(mapping.clone());
+    }
+    return nonBlankTokens.clone();
+  }
+
+  /**
+   * Get a position of a non-whitespace token in the original sentence with
+   * whitespace.
+   * 
+   * @param nonWhPosition
+   *          Position of a non-whitespace token
+   * @return int position in the original sentence.
+   */
+  public final int getOriginalPosition(final int nonWhPosition) {
+    if (nonBlankTokens == null) {
+      getTokensWithoutWhitespace();
+    }
+    return getWhPositions()[nonWhPosition];
+  }
+
+  @Override
+  public final String toString() {
+    final StringBuilder sb = new StringBuilder();
+    for (final AnalyzedTokenReadings element : tokens) {
+      if (!element.isWhitespace()) {
+        sb.append(element.getToken());
+        sb.append('[');
+      }
+      for (int j = 0; j < element.getReadingsLength(); j++) {
+        final String posTag = element.getAnalyzedToken(j).getPOSTag();
+        if (element.isSentStart()) {
+          sb.append("<S>");
+        } else if (JLanguageTool.SENTENCE_END_TAGNAME.equals(element
+            .getAnalyzedToken(j).getPOSTag())) {
+          sb.append("</S>");
+        } else if (JLanguageTool.PARAGRAPH_END_TAGNAME.equals(element
+            .getAnalyzedToken(j).getPOSTag())) {
+          sb.append("<P/>");
+        } else if (element.getAnalyzedToken(j) != null && posTag == null
+            && !(element instanceof AnalyzedGermanTokenReadings)) {
+          // FIXME: don't depend on AnalyzedGermanTokenReadings here
+          sb.append(element.getAnalyzedToken(j).getToken());
+        } else {
+          if (!element.isWhitespace()) {
+            sb.append(element.getAnalyzedToken(j));
+            if (j < element.getReadingsLength() - 1) {
+              sb.append(',');
+            }
+          }
+        }
+      }
+      if (!element.isWhitespace()) {
+        sb.append(']');
+      } else {
+        sb.append(' ');
+      }
+
+    }
+    return sb.toString();
+  }
+
+  /**
+   * @param whPositions the whPositions to set
+   */
+  public void setWhPositions(int[] whPositions) {
+    this.whPositions = whPositions;
+  }
+
+  /**
+   * @return the whPositions
+   */
+  public int[] getWhPositions() {
+    return whPositions;
+  }
+
+  /**
+   * @param nonBlankTokens the nonBlankTokens to set
+   */
+  public void setNonBlankTokens(AnalyzedTokenReadings[] nonBlankTokens) {
+    this.nonBlankTokens = nonBlankTokens;
+  } 
+  
+}
author	Arno Teigseth <arno@teigseth.no>	2011-02-05 08:48:27 +0000
committer	Arno Teigseth <arno@teigseth.no>	2011-02-05 08:48:27 +0000
commit	4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce (patch)
tree	7af736540eca93034428a975bd850e709fbbe2e5 /JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedSentence.java
parent	ecaee85ab5984ebadd56721c295dc26b3335f7ce (diff)
download	grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.gz grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.bz2 grammar-norwegian-4f3d565a5e5ede6eb6fd1f276d4e8ad37b67b5ce.tar.xz