/* LanguageTool, a natural language style checker * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package de.danielnaber.languagetool; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import de.danielnaber.languagetool.tagging.de.AnalyzedGermanTokenReadings; /** * A sentence that has been tokenized and analyzed. * * @author Daniel Naber */ public class AnalyzedSentence { @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + Arrays.hashCode(nonBlankTokens); result = prime * result + Arrays.hashCode(tokens); result = prime * result + Arrays.hashCode(whPositions); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; final AnalyzedSentence other = (AnalyzedSentence) obj; if (!Arrays.equals(nonBlankTokens, other.nonBlankTokens)) return false; if (!Arrays.equals(tokens, other.tokens)) return false; if (!Arrays.equals(whPositions, other.whPositions)) return false; return true; } private AnalyzedTokenReadings[] tokens; private AnalyzedTokenReadings[] nonBlankTokens; /** * Array mapping positions of tokens as returned with * getTokensWithoutWhitespace() to the internal tokens array. */ private int[] whPositions; /** * Sets {@link AnalyzedTokenReadings}. Whitespace is also a token. */ public AnalyzedSentence(final AnalyzedTokenReadings[] tokens) { this.tokens = tokens; } public AnalyzedSentence(final AnalyzedTokenReadings[] tokens, final int[] whPositions) { this.tokens = tokens; this.setWhPositions(whPositions); getTokensWithoutWhitespace(); } /** * Returns the {@link AnalyzedTokenReadings} of the analyzed text. Whitespace * is also a token. */ public final AnalyzedTokenReadings[] getTokens() { return tokens; } /** * Returns the {@link AnalyzedTokenReadings} of the analyzed text, with * whitespace tokens removed but with the artificial SENT_START * token included. */ public final AnalyzedTokenReadings[] getTokensWithoutWhitespace() { if (nonBlankTokens == null) { int whCounter = 0; int nonWhCounter = 0; final int[] mapping = new int[tokens.length + 1]; final List l = new ArrayList(); for (final AnalyzedTokenReadings token : tokens) { if (!token.isWhitespace() || token.isSentStart() || token.isSentEnd() || token.isParaEnd()) { l.add(token); mapping[nonWhCounter] = whCounter; nonWhCounter++; } whCounter++; } setNonBlankTokens(l.toArray(new AnalyzedTokenReadings[l.size()])); setWhPositions(mapping.clone()); } return nonBlankTokens.clone(); } /** * Get a position of a non-whitespace token in the original sentence with * whitespace. * * @param nonWhPosition * Position of a non-whitespace token * @return int position in the original sentence. */ public final int getOriginalPosition(final int nonWhPosition) { if (nonBlankTokens == null) { getTokensWithoutWhitespace(); } return getWhPositions()[nonWhPosition]; } @Override public final String toString() { final StringBuilder sb = new StringBuilder(); for (final AnalyzedTokenReadings element : tokens) { if (!element.isWhitespace()) { sb.append(element.getToken()); sb.append('['); } for (int j = 0; j < element.getReadingsLength(); j++) { final String posTag = element.getAnalyzedToken(j).getPOSTag(); if (element.isSentStart()) { sb.append("~~"); } else if (JLanguageTool.SENTENCE_END_TAGNAME.equals(element .getAnalyzedToken(j).getPOSTag())) { sb.append("~~"); } else if (JLanguageTool.PARAGRAPH_END_TAGNAME.equals(element .getAnalyzedToken(j).getPOSTag())) { sb.append("

"); } else if (element.getAnalyzedToken(j) != null && posTag == null && !(element instanceof AnalyzedGermanTokenReadings)) { // FIXME: don't depend on AnalyzedGermanTokenReadings here sb.append(element.getAnalyzedToken(j).getToken()); } else { if (!element.isWhitespace()) { sb.append(element.getAnalyzedToken(j)); if (j < element.getReadingsLength() - 1) { sb.append(','); } } } } if (!element.isWhitespace()) { sb.append(']'); } else { sb.append(' '); } } return sb.toString(); } /** * @param whPositions the whPositions to set */ public void setWhPositions(int[] whPositions) { this.whPositions = whPositions; } /** * @return the whPositions */ public int[] getWhPositions() { return whPositions; } /** * @param nonBlankTokens the nonBlankTokens to set */ public void setNonBlankTokens(AnalyzedTokenReadings[] nonBlankTokens) { this.nonBlankTokens = nonBlankTokens; } }