summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java
diff options
context:
space:
mode:
Diffstat (limited to 'JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java')
-rw-r--r--JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java432
1 files changed, 432 insertions, 0 deletions
diff --git a/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java
new file mode 100644
index 0000000..7fbb35d
--- /dev/null
+++ b/JLanguageTool/src/java/de/danielnaber/languagetool/rules/patterns/Unifier.java
@@ -0,0 +1,432 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package de.danielnaber.languagetool.rules.patterns;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import de.danielnaber.languagetool.AnalyzedToken;
+import de.danielnaber.languagetool.AnalyzedTokenReadings;
+
+/**
+ * Implements unification of features over tokens.
+ *
+ * @author Marcin Milkowski
+ */
+public class Unifier {
+
+ //TODO: add a possibility to negate some features but not all
+ /**
+ * Negates the meaning of unification just like negation in Element tokens.
+ */
+ private boolean negation;
+
+ private boolean allFeatsIn;
+
+ private int tokCnt;
+
+ private int readingsCounter;
+
+ private final List<AnalyzedTokenReadings> tokSequence;
+
+ /**
+ * A Map for storing the equivalence types for features. Features are
+ * specified as Strings, and map into types defined as maps from Strings to
+ * Elements.
+ */
+ private final Map<EquivalenceTypeLocator, Element> equivalenceTypes;
+
+ /**
+ * A Map that stores all possible equivalence types listed for features.
+ */
+ private final Map<String, List<String>> equivalenceFeatures;
+
+ /**
+ * Map of sets of matched equivalences in the unified sequence.
+ */
+ private final List<Map<String, Set<String>>> equivalencesMatched;
+
+ /**
+ * Marks found interpretations in subsequent tokens.
+ */
+ private List<Boolean> featuresFound;
+
+ /**
+ * For checking the current token.
+ */
+ private List<Boolean> tmpFeaturesFound;
+
+ /**
+ * Internal flag for checking whether the first token in tokSequence has to be
+ * yet unified.
+ */
+ private boolean firstUnified;
+
+ private boolean inUnification;
+ private boolean uniMatched;
+ private boolean uniAllMatched;
+ private AnalyzedTokenReadings[] unifiedTokens;
+
+ /**
+ * Instantiates the unifier.
+ */
+ public Unifier() {
+ tokCnt = -1;
+ readingsCounter = 1;
+ equivalencesMatched = new ArrayList<Map<String, Set<String>>>();
+ equivalenceTypes = new HashMap<EquivalenceTypeLocator, Element>();
+ equivalenceFeatures = new HashMap<String, List<String>>();
+ featuresFound = new ArrayList<Boolean>();
+ tmpFeaturesFound = new ArrayList<Boolean>();
+ tokSequence = new ArrayList<AnalyzedTokenReadings>();
+ }
+
+ /**
+ * Prepares equivalence types for features to be tested. All equivalence types
+ * are given as {@link Element}s. They create an equivalence set (with
+ * abstraction).
+ *
+ * @param feature
+ * Feature to be tested, like gender, grammatical case or number.
+ * @param type
+ * Type of equivalence for the feature, for example plural, first
+ * person, genitive.
+ * @param elem
+ * Element specifying the equivalence.
+ */
+ public final void setEquivalence(final String feature, final String type,
+ final Element elem) {
+ if (equivalenceTypes.containsKey(new EquivalenceTypeLocator(feature, type))) {
+ return;
+ }
+ equivalenceTypes.put(new EquivalenceTypeLocator(feature, type), elem);
+ List<String> lTypes;
+ if (equivalenceFeatures.containsKey(feature)) {
+ lTypes = equivalenceFeatures.get(feature);
+ } else {
+ lTypes = new ArrayList<String>();
+ }
+ lTypes.add(type);
+ equivalenceFeatures.put(feature, lTypes);
+ }
+
+ /**
+ * Tests if a token has shared features with other tokens.
+ *
+ * @param aToken
+ * - token to be tested
+ * @param feature
+ * - feature to be tested
+ * @param type
+ * - type of equivalence relation for the feature
+ * @return true if the token shares this type of feature with other tokens
+ */
+ protected final boolean isSatisfied(final AnalyzedToken aToken,
+ final Map<String, List<String>> uFeatures) {
+
+ if (allFeatsIn && equivalencesMatched.isEmpty()) {
+ return false;
+ }
+ // Error: no feature given!
+ if (uFeatures == null) {
+ return false; // throw exception??
+ }
+ boolean unified = true;
+ List<String> types;
+
+ if (allFeatsIn) {
+ unified &= checkNext(aToken, uFeatures);
+ } else {
+ tokCnt++;
+ while (equivalencesMatched.size() <= tokCnt) {
+ equivalencesMatched.add(new HashMap<String, Set<String>>());
+ }
+ for (final Map.Entry<String, List<String>> feat : uFeatures.entrySet()) {
+ types = feat.getValue();
+ if (types == null || types.isEmpty()) {
+ types = equivalenceFeatures.get(feat.getKey());
+ }
+ for (final String typename : types) {
+ final Element testElem = equivalenceTypes
+ .get(new EquivalenceTypeLocator(feat.getKey(), typename));
+ if (testElem == null) {
+ return false;
+ }
+ if (testElem.isMatched(aToken)) {
+ if (!equivalencesMatched.get(tokCnt).containsKey(feat.getKey())) {
+ final Set<String> typeSet = new HashSet<String>();
+ typeSet.add(typename);
+ equivalencesMatched.get(tokCnt).put(feat.getKey(), typeSet);
+ } else {
+ equivalencesMatched.get(tokCnt).get(feat.getKey()).add(typename);
+ }
+ }
+ }
+ unified &= equivalencesMatched.get(tokCnt).containsKey(feat.getKey());
+ if (!unified) {
+ break;
+ }
+ }
+ if (unified) {
+ if (tokCnt == 0 || tokSequence.isEmpty()) {
+ tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
+ } else {
+ tokSequence.get(0).addReading(aToken);
+ }
+ }
+ }
+ return unified ^ negation;
+ }
+
+ private boolean checkNext(final AnalyzedToken aToken,
+ final Map<String, List<String>> uFeatures) {
+ boolean unifiedNext = true;
+ boolean anyFeatUnified = false;
+ List<String> types;
+ ArrayList<Boolean> tokenFeaturesFound = new ArrayList<Boolean>(tmpFeaturesFound);
+ if (allFeatsIn) {
+ for (int i = 0; i <= tokCnt; i++) {
+ boolean allFeatsUnified = true;
+ for (Map.Entry<String, List<String>> feat : uFeatures.entrySet()) {
+ boolean featUnified = false;
+ types = feat.getValue();
+ if (types == null || types.isEmpty()) {
+ types = equivalenceFeatures.get(feat.getKey());
+ }
+ for (final String typename : types) {
+ if (featuresFound.get(i)
+ && equivalencesMatched.get(i).containsKey(feat.getKey())
+ && equivalencesMatched.get(i).get(feat.getKey()).contains(typename)) {
+ final Element testElem = equivalenceTypes
+ .get(new EquivalenceTypeLocator(feat.getKey(), typename));
+ featUnified = featUnified || testElem.isMatched(aToken);
+ }
+ }
+ allFeatsUnified &= featUnified;
+ }
+ tokenFeaturesFound.set(i, allFeatsUnified);
+ anyFeatUnified = anyFeatUnified || allFeatsUnified;
+ }
+ unifiedNext &= anyFeatUnified;
+ if (unifiedNext) {
+ if (tokSequence.size() == readingsCounter) {
+ tokSequence.add(new AnalyzedTokenReadings(aToken, 0));
+ } else {
+ tokSequence.get(readingsCounter).addReading(aToken);
+ }
+ tmpFeaturesFound = tokenFeaturesFound;
+ }
+ }
+ return unifiedNext;
+ }
+
+ /**
+ * Call after every complete token (AnalyzedTokenReadings) checked.
+ */
+ public final void startNextToken() {
+ featuresFound = new ArrayList<Boolean>(tmpFeaturesFound);
+ readingsCounter++;
+ }
+
+ /**
+ * Starts testing only those equivalences that were previously matched.
+ */
+ public final void startUnify() {
+ allFeatsIn = true;
+ for (int i = 0; i <= tokCnt; i++) {
+ featuresFound.add(true);
+ }
+ tmpFeaturesFound = new ArrayList<Boolean>(featuresFound);
+ }
+
+ public final void setNegation(final boolean neg) {
+ negation = neg;
+ }
+
+ public final boolean getNegation() {
+ return negation;
+ }
+
+ /**
+ * Resets after use of unification. Required.
+ */
+ public final void reset() {
+ equivalencesMatched.clear();
+ allFeatsIn = false;
+ negation = false;
+ tokCnt = -1;
+ featuresFound.clear();
+ tmpFeaturesFound.clear();
+ tokSequence.clear();
+ readingsCounter = 1;
+ firstUnified = false;
+ uniMatched = false;
+ uniAllMatched = false;
+ inUnification = false;
+ }
+
+ /**
+ * Gets a full sequence of filtered tokens.
+ *
+ * @return Array of AnalyzedTokenReadings that match equivalence relation
+ * defined for features tested.
+ */
+ public final AnalyzedTokenReadings[] getUnifiedTokens() {
+ if (tokSequence.isEmpty()) {
+ return null;
+ }
+ if (!firstUnified) {
+ AnalyzedTokenReadings tmpATR;
+ int first = 0;
+ tmpFeaturesFound.add(true); // Bentley's search idea
+ while (!tmpFeaturesFound.get(first)) {
+ first++;
+ }
+ tmpFeaturesFound.remove(tmpFeaturesFound.size() - 1);
+ if (first >= tmpFeaturesFound.size()) {
+ return null;
+ }
+ // FIXME: why this happens??
+ final int numRead = tokSequence.get(0).getReadingsLength();
+ if (first < numRead) {
+ tmpATR = new AnalyzedTokenReadings(tokSequence.get(0).getAnalyzedToken(
+ first), 0);
+ for (int i = first + 1; i <= Math.min(numRead - 1, tokCnt); i++) {
+ if (tmpFeaturesFound.get(i)) {
+ tmpATR.addReading(tokSequence.get(0).getAnalyzedToken(i));
+ }
+ }
+ tokSequence.set(0, tmpATR);
+ }
+ firstUnified = true;
+ }
+ final AnalyzedTokenReadings[] atr = tokSequence
+ .toArray(new AnalyzedTokenReadings[tokSequence.size()]);
+ return atr;
+ }
+
+ /**
+ * Tests if the token sequence is unified.
+ *
+ * @param matchToken
+ * AnalyzedToken token to unify
+ * @param feature
+ * String: feature to unify over
+ * @param type
+ * String: value types of the feature
+ * @param isUniNegated
+ * if true, then return negated result
+ * @param lastReading
+ * true when the matchToken is the last reading in the
+ * AnalyzedReadings
+ * @return True if the tokens in the sequence are unified.
+ */
+ public final boolean isUnified(final AnalyzedToken matchToken,
+ final Map<String, List<String>> uFeatures, final boolean isUniNegated,
+ final boolean lastReading) {
+ if (inUnification) {
+ uniMatched |= isSatisfied(matchToken, uFeatures);
+ uniAllMatched = uniMatched;
+ if (lastReading) {
+ startNextToken();
+ unifiedTokens = getUnifiedTokens();
+ uniMatched = false;
+ }
+ return uniAllMatched;
+ }
+ if (isUniNegated) {
+ setNegation(true);
+ }
+ isSatisfied(matchToken, uFeatures);
+ if (lastReading) {
+ inUnification = true;
+ uniMatched = false;
+ startUnify();
+ }
+ return true;
+ }
+
+ /**
+ * Used for getting a unified sequence in case when simple test method
+ * {@link #isUnified} was used.
+ *
+ * @return An array of {@link AnalyzedTokenReadings}
+ */
+ public final AnalyzedTokenReadings[] getFinalUnified() {
+ if (inUnification) {
+ return unifiedTokens;
+ }
+ return null;
+ }
+}
+
+class EquivalenceTypeLocator {
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((feature == null) ? 0 : feature.hashCode());
+ result = prime * result + ((type == null) ? 0 : type.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final EquivalenceTypeLocator other = (EquivalenceTypeLocator) obj;
+ if (feature == null) {
+ if (other.feature != null) {
+ return false;
+ }
+ } else if (!feature.equals(other.feature)) {
+ return false;
+ }
+ if (type == null) {
+ if (other.type != null) {
+ return false;
+ }
+ } else if (!type.equals(other.type)) {
+ return false;
+ }
+ return true;
+ }
+
+ private final String feature;
+ private final String type;
+
+ EquivalenceTypeLocator(final String feature, final String type) {
+ this.feature = feature;
+ this.type = type;
+ }
+
+} \ No newline at end of file