diff options
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java')
-rw-r--r-- | JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java new file mode 100644 index 0000000..ac6dc54 --- /dev/null +++ b/JLanguageTool/src/java/de/danielnaber/languagetool/AnalyzedTokenReadings.java @@ -0,0 +1,284 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ + +package de.danielnaber.languagetool; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import de.danielnaber.languagetool.tools.StringTools; + +/** + * An array of {@link AnalyzedToken}s used to store multiple POS tags and lemmas + * for a given single token. + * + * @author Marcin Milkowski + */ +public class AnalyzedTokenReadings { + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(anTokReadings); + result = prime * result + (isLinebreak ? 1231 : 1237); + result = prime * result + (isParaEnd ? 1231 : 1237); + result = prime * result + (isSentEnd ? 1231 : 1237); + result = prime * result + (isSentStart ? 1231 : 1237); + result = prime * result + (isWhitespace ? 1231 : 1237); + result = prime * result + (isWhitespaceBefore ? 1231 : 1237); + result = prime * result + startPos; + result = prime * result + ((token == null) ? 0 : token.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + final AnalyzedTokenReadings other = (AnalyzedTokenReadings) obj; + if (!Arrays.equals(anTokReadings, other.anTokReadings)) + return false; + if (isLinebreak != other.isLinebreak) + return false; + if (isParaEnd != other.isParaEnd) + return false; + if (isSentEnd != other.isSentEnd) + return false; + if (isSentStart != other.isSentStart) + return false; + if (isWhitespace != other.isWhitespace) + return false; + if (isWhitespaceBefore != other.isWhitespaceBefore) + return false; + if (startPos != other.startPos) + return false; + if (token == null) { + if (other.token != null) + return false; + } else if (!token.equals(other.token)) + return false; + return true; + } + + protected AnalyzedToken[] anTokReadings; + private int startPos; + private String token; + + private boolean isWhitespace; + private boolean isLinebreak; + private boolean isSentEnd; + private boolean isSentStart; + private boolean isParaEnd; + + private boolean isWhitespaceBefore; + + public AnalyzedTokenReadings(final AnalyzedToken[] r, final int startPos) { + anTokReadings = r.clone(); + this.startPos = startPos; + init(); + } + + public AnalyzedTokenReadings(final List<AnalyzedToken> list, final int startPos) { + anTokReadings = list.toArray(new AnalyzedToken[list.size()]); + this.startPos = startPos; + init(); + } + + AnalyzedTokenReadings(final AnalyzedToken at) { + anTokReadings = new AnalyzedToken[1]; + anTokReadings[0] = at; + isWhitespaceBefore = at.isWhitespaceBefore(); + init(); + } + + public AnalyzedTokenReadings(final AnalyzedToken at, final int startPos) { + this(at); + this.startPos = startPos; + } + + private void init() { + token = anTokReadings[0].getToken(); + isWhitespace = StringTools.isWhitespace(token); + isLinebreak = "\n".equals(token) || "\r\n".equals(token) + || "\r".equals(token) || "\n\r".equals(token); + isSentStart = JLanguageTool.SENTENCE_START_TAGNAME.equals(anTokReadings[0] + .getPOSTag()); + isParaEnd = hasPosTag(JLanguageTool.PARAGRAPH_END_TAGNAME); + isSentEnd = hasPosTag(JLanguageTool.SENTENCE_END_TAGNAME); + } + + public final List<AnalyzedToken> getReadings() { + return Arrays.asList(anTokReadings); + } + + /** + * Checks if the token has a particular POS tag. + * + * @param pos + * POS Tag to check + * @return True if it does. + */ + public final boolean hasPosTag(final String pos) { + boolean found = false; + for (final AnalyzedToken reading : anTokReadings) { + if (reading.getPOSTag() != null) { + found = pos.equals(reading.getPOSTag()); + if (found) { + break; + } + } + } + return found; + } + + public final AnalyzedToken getAnalyzedToken(final int i) { + return anTokReadings[i]; + } + + public final void addReading(final AnalyzedToken tok) { + final ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>(); + for (int i = 0; i < anTokReadings.length - 1; i++) { + l.add(anTokReadings[i]); + } + if (anTokReadings[anTokReadings.length - 1].getPOSTag() != null) { + l.add(anTokReadings[anTokReadings.length - 1]); + } + tok.setWhitespaceBefore(isWhitespaceBefore); + l.add(tok); + anTokReadings = l.toArray(new AnalyzedToken[l.size()]); + if (tok.getToken().length() > token.length()) { //in case a longer token is added + token = tok.getToken(); + } + anTokReadings[anTokReadings.length - 1]. + setWhitespaceBefore(isWhitespaceBefore); + isParaEnd = hasPosTag(JLanguageTool.PARAGRAPH_END_TAGNAME); + isSentEnd = hasPosTag(JLanguageTool.SENTENCE_END_TAGNAME); + } + + public final void removeReading(final AnalyzedToken tok) { + final ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>(); + final AnalyzedToken tmpTok = new AnalyzedToken(tok.getToken(), tok + .getPOSTag(), tok.getLemma()); + tmpTok.setWhitespaceBefore(isWhitespaceBefore); + for (AnalyzedToken anTokReading : anTokReadings) { + if (!anTokReading.equals(tmpTok)) { + l.add(anTokReading); + } + } + anTokReadings = l.toArray(new AnalyzedToken[l.size()]); + } + + public final int getReadingsLength() { + return anTokReadings.length; + } + + public final boolean isWhitespace() { + return isWhitespace; + } + + /** + * Returns true if the token equals \n, \r\n \n\r or \r\n. + */ + public final boolean isLinebreak() { + return isLinebreak; + } + + public final boolean isSentStart() { + return isSentStart; + } + + /** + * @return true when the token is a last token in a paragraph. + */ + public final boolean isParaEnd() { + return isParaEnd; + } + + /** + * Add PARA_END tag. + */ + public void setParaEnd() { + final AnalyzedToken paragraphEnd = new AnalyzedToken(getToken(), + JLanguageTool.PARAGRAPH_END_TAGNAME, getAnalyzedToken(0).getLemma()); + addReading(paragraphEnd); + } + + /** + * @return true when the token is a last token in a sentence. + */ + public final boolean isSentEnd() { + return isSentEnd; + } + + /** + * @since 0.9.9 + * @return true if the token is OpenOffice field code. + */ + public final boolean isFieldCode() { + return "\u0001".equals(token) || "\u0002".equals(token); + } + + /** + * Add a SENT_END tag. + */ + public final void setSentEnd() { + final AnalyzedToken sentenceEnd = new AnalyzedToken(getToken(), + JLanguageTool.SENTENCE_END_TAGNAME, getAnalyzedToken(0).getLemma()); + addReading(sentenceEnd); + } + + public final int getStartPos() { + return startPos; + } + + public final void setStartPos(final int position) { + startPos = position; + } + + public final String getToken() { + return token; + } + + public final void setWhitespaceBefore(final boolean isWhite) { + isWhitespaceBefore = isWhite; + for (final AnalyzedToken aTok : anTokReadings) { + aTok.setWhitespaceBefore(isWhite); + } + } + + public final boolean isWhitespaceBefore() { + return isWhitespaceBefore; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + for (final AnalyzedToken element : anTokReadings) { + sb.append(element); + } + return sb.toString(); + } + +} |