/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package de.danielnaber.languagetool.rules.patterns;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import de.danielnaber.languagetool.AnalyzedSentence;
import de.danielnaber.languagetool.AnalyzedTokenReadings;
import de.danielnaber.languagetool.Language;
import de.danielnaber.languagetool.rules.IncorrectExample;
import de.danielnaber.languagetool.rules.RuleMatch;
import de.danielnaber.languagetool.tools.StringTools;
/**
* A Rule that describes a language error as a simple pattern of words or of
* part-of-speech tags.
*
* @author Daniel Naber
*/
public class PatternRule extends AbstractPatternRule {
private static final String SUGG_TAG = "";
private static final String END_SUGG_TAG = "";
private String subId; // because there can be more than one rule in a rule
// group
private String message;
private String shortMessage;
/** Formatted suggestion elements. **/
private List suggestionMatches;
/**
* A list of elements as they appear in XML file (phrases count as single
* tokens in case of matches or skipping).
*/
private List elementNo;
/**
* This property is used for short-circuiting evaluation of the elementNo list
* order.
*/
private boolean useList;
/**
* Marks whether the rule is a member of a disjunctive set (in case of OR
* operation on phraserefs).
**/
private boolean isMemberOfDisjunctiveSet;
/**
* @param id
* Id of the Rule
* @param language
* Language of the Rule
* @param elements
* Element (token) list
* @param description
* Description to be shown (name)
* @param message
* Message to be displayed to the user
*/
public PatternRule(final String id, final Language language,
final List elements, final String description,
final String message, final String shortMessage) {
super(id, description, language, elements, false);
if (id == null) {
throw new NullPointerException("id cannot be null");
}
if (language == null) {
throw new NullPointerException("language cannot be null");
}
if (elements == null) {
throw new NullPointerException("elements cannot be null");
}
if (description == null) {
throw new NullPointerException("description cannot be null");
}
this.message = message;
this.shortMessage = shortMessage;
this.elementNo = new ArrayList();
String prevName = "";
String curName = "";
int cnt = 0;
int loopCnt = 0;
for (final Element e : patternElements) {
if (e.isPartOfPhrase()) {
curName = e.getPhraseName();
if (prevName.equals(curName) || StringTools.isEmpty(prevName)) {
cnt++;
useList = true;
} else {
elementNo.add(cnt);
prevName = "";
curName = "";
cnt = 0;
}
prevName = curName;
loopCnt++;
if (loopCnt == patternElements.size() && !StringTools.isEmpty(prevName)) {
elementNo.add(cnt);
}
} else {
if (cnt > 0) {
elementNo.add(cnt);
}
elementNo.add(1);
loopCnt++;
}
}
}
public PatternRule(final String id, final Language language,
final List elements, final String description,
final String message, final String shortMessage, final boolean isMember) {
this(id, language, elements, description, message, shortMessage);
this.isMemberOfDisjunctiveSet = isMember;
}
public final String getSubId() {
return subId;
}
public final void setSubId(final String subId) {
this.subId = subId;
}
public final String getMessage() {
return message;
}
/**
* Used for testing rules: only one of the set can match.
*
* @return Whether the rule can non-match (as a member of disjunctive set of
* rules generated by phraseref in includephrases element).
*/
public final boolean isWithComplexPhrase() {
return isMemberOfDisjunctiveSet;
}
/** Reset complex status - used for testing. **/
public final void notComplexPhrase() {
isMemberOfDisjunctiveSet = false;
}
/**
* Return the pattern as a string.
*
* @since 0.9.2
*/
public final String toPatternString() {
final List strList = new ArrayList();
for (Element patternElement : patternElements) {
strList.add(patternElement.toString());
}
return StringTools.listToString(strList, ", ");
}
/**
* Return the pattern as an XML string. FIXME: this is not complete, information might be lost!
*
* @since 0.9.3
*/
public final String toXML() {
final StringBuilder sb = new StringBuilder();
sb.append("\n");
sb.append("\n");
for (Element patternElement : patternElements) {
sb.append("');
if (patternElement.getString() != null) {
sb.append(StringTools.escapeXML(patternElement.getString()));
} else {
// TODO
}
sb.append("\n");
}
sb.append("\n");
sb.append("");
sb.append(StringTools.escapeXML(message));
sb.append("\n");
if (getIncorrectExamples() != null) {
for (IncorrectExample example : getIncorrectExamples()) {
sb.append("");
sb.append(StringTools.escapeXML(example.getExample()));
sb.append("\n");
}
}
if (getCorrectExamples() != null) {
for (String example : getCorrectExamples()) {
sb.append("");
sb.append(StringTools.escapeXML(example));
sb.append("\n");
}
}
sb.append("");
return sb.toString();
}
public final void setMessage(final String message) {
this.message = message;
}
@Override
public final RuleMatch[] match(final AnalyzedSentence text)
throws IOException {
final List ruleMatches = new ArrayList();
final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
final int[] tokenPositions = new int[tokens.length + 1];
final int patternSize = patternElements.size();
final int limit = Math.max(0, tokens.length - patternSize + 1);
Element elem = null;
int i = 0;
while (i < limit && !(sentStart && i > 0)) {
boolean allElementsMatch = false;
int firstMatchToken = -1;
int lastMatchToken = -1;
int matchingTokens = 0;
int prevSkipNext = 0;
// this variable keeps the total number
// of tokens skipped
int skipShiftTotal = 0;
if (testUnification) {
unifier.reset();
}
for (int k = 0; k < patternSize; k++) {
final Element prevElement = elem;
elem = patternElements.get(k);
setupRef(firstMatchToken, elem, tokens);
final int nextPos = i + k + skipShiftTotal;
prevMatched = false;
if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // SENT_END?
prevSkipNext = tokens.length - (nextPos + 1);
}
final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - (patternSize - k));
for (int m = nextPos; m <= maxTok; m++) {
allElementsMatch = testAllReadings(tokens, elem, prevElement, m,
firstMatchToken, prevSkipNext);
if (allElementsMatch) {
lastMatchToken = m;
final int skipShift = lastMatchToken - nextPos;
tokenPositions[matchingTokens] = skipShift + 1;
prevSkipNext = translateElementNo(elem.getSkipNext());
matchingTokens++;
skipShiftTotal += skipShift;
if (firstMatchToken == -1) {
firstMatchToken = lastMatchToken;
}
break;
}
}
if (!allElementsMatch) {
break;
}
}
if (allElementsMatch && matchingTokens == patternSize) {
final RuleMatch rM = createRuleMatch(tokenPositions, tokens,
firstMatchToken, lastMatchToken, matchingTokens);
if (rM != null) {
ruleMatches.add(rM);
}
}
i++;
}
return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
}
private RuleMatch createRuleMatch(final int[] tokenPositions,
final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
final int lastMatchToken, final int matchingTokens) throws IOException {
final String errMessage = formatMatches(tokens, tokenPositions,
firstMatchToken, message);
int correctedStPos = 0;
if (startPositionCorrection > 0) {
for (int l = 0; l <= startPositionCorrection; l++) {
correctedStPos += tokenPositions[l];
}
correctedStPos--;
}
int correctedEndPos = 0;
if (endPositionCorrection < 0) {
int l = 0;
while (l > endPositionCorrection) {
correctedEndPos -= tokenPositions[matchingTokens + l - 1];
l--;
}
}
AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
+ correctedStPos];
boolean startsWithUppercase = StringTools
.startsWithUppercase(firstMatchTokenObj.getToken())
&& !matchConvertsCase();
if (firstMatchTokenObj.isSentStart()
&& tokens.length > firstMatchToken + correctedStPos + 1) {
// make uppercasing work also at sentence start:
firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj
.getToken());
}
int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
// FIXME: this is fishy, assumes that comma should always come before
// whitespace
if (errMessage.contains(SUGG_TAG + ",")
&& firstMatchToken + correctedStPos >= 1) {
fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
+ tokens[firstMatchToken + correctedStPos - 1].getToken().length();
}
final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
+ tokens[lastMatchToken + correctedEndPos].getToken().length();
if (fromPos < toPos) { // this can happen with some skip="-1" when the last
// token is not matched
return new RuleMatch(this, fromPos, toPos,
errMessage, shortMessage, startsWithUppercase);
} // failed to create any rule match...
return null;
}
/**
* Checks if the suggestion starts with a match that is supposed to convert
* case. If it does, stop the default conversion to uppercase.
*
* @return true, if the match converts the case of the token.
*/
private boolean matchConvertsCase() {
if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
final int sugStart = message.indexOf(SUGG_TAG) + SUGG_TAG.length();
for (Match sMatch : suggestionMatches) {
if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
&& message.charAt(sugStart) == '\\') {
return true;
}
}
}
return false;
}
public final void addSuggestionMatch(final Match m) {
if (suggestionMatches == null) {
suggestionMatches = new ArrayList();
}
suggestionMatches.add(m);
}
/**
* Gets the index of the element indexed by i, adding any offsets because of
* the phrases in the rule.
*
* @param i
* Current element index.
* @return int Index translated into XML element no.
*/
private int translateElementNo(final int i) {
if (!useList || i < 0) {
return i;
}
int j = 0;
for (int k = 0; k < i; k++) {
j += elementNo.get(k);
}
return j;
}
/**
* Returns true when the token in the rule references a phrase composed of
* many tokens.
*
* @param i
* The index of the token.
* @return true if the phrase is under the index, false otherwise.
**/
private int phraseLen(final int i) {
if (!useList || i > (elementNo.size() - 1)) {
return 1;
}
return elementNo.get(i);
}
/**
* Creates a Cartesian product of the arrays stored in the input array.
*
* @param input
* Array of string arrays to combine.
* @param output
* Work array of strings.
* @param r
* Starting parameter (use 0 to get all combinations).
* @param lang
* Text language for adding spaces in some languages.
* @return Combined array of @String.
*/
private static String[] combineLists(final String[][] input,
final String[] output, final int r, final Language lang) {
final List outputList = new ArrayList();
if (r == input.length) {
final StringBuilder sb = new StringBuilder();
for (int k = 0; k < output.length; k++) {
sb.append(output[k]);
if (k < output.length - 1) {
sb.append(StringTools.addSpace(output[k + 1], lang));
}
}
outputList.add(sb.toString());
} else {
for (int c = 0; c < input[r].length; c++) {
output[r] = input[r][c];
final String[] sList = combineLists(input, output, r + 1, lang);
outputList.addAll(Arrays.asList(sList));
}
}
return outputList.toArray(new String[outputList.size()]);
}
/**
* Concatenates the matches, and takes care of phrases (including inflection
* using synthesis).
*
* @param start
* Position of the element as referenced by match element in the
* rule.
* @param index
* The index of the element found in the matching sentence.
* @param tokenIndex
* The position of the token in the AnalyzedTokenReadings array.
* @param tokens
* Array of @AnalyzedTokenReadings
* @return @String[] Array of concatenated strings
* @throws IOException
* in case disk operations (used in synthesizer) go wrong.
*/
private String[] concatMatches(final int start, final int index,
final int tokenIndex, final AnalyzedTokenReadings[] tokens,
final int nextTokenPos)
throws IOException {
String[] finalMatch = null;
if (suggestionMatches.get(start) != null) {
final int len = phraseLen(index);
if (len == 1) {
final int skippedTokens = nextTokenPos - tokenIndex;
suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, skippedTokens);
suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
finalMatch = suggestionMatches.get(start).toFinalString();
} else {
final List matchList = new ArrayList();
for (int i = 0; i < len; i++) {
final int skippedTokens = nextTokenPos - (tokenIndex + i);
suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, skippedTokens);
suggestionMatches.get(start)
.setSynthesizer(language.getSynthesizer());
matchList.add(suggestionMatches.get(start).toFinalString());
}
return combineLists(matchList.toArray(new String[matchList.size()][]),
new String[matchList.size()], 0, language);
}
}
return finalMatch;
}
/**
* Replace back references generated with <match> and \\1 in message
* using Match class, and take care of skipping. *
*
* @param tokenReadings
* Array of AnalyzedTokenReadings that were matched against the
* pattern
* @param positions
* Array of relative positions of matched tokens
* @param firstMatchTok
* Position of the first matched token
* @param errorMsg
* String containing suggestion markup
* @return String Formatted message.
* @throws IOException
*
**/
private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
final int[] positions, final int firstMatchTok, final String errorMsg)
throws IOException {
String errorMessage = errorMsg;
int matchCounter = 0;
final int[] numbersToMatches = new int[errorMsg.length()];
boolean newWay = false;
int errLen = errorMessage.length();
int errMarker = errorMessage.indexOf('\\');
boolean numberFollows = false;
if (errMarker > 0 && errMarker < errLen - 1) {
numberFollows = StringTools.isPositiveNumber(errorMessage
.charAt(errMarker + 1));
}
while (errMarker > 0 && numberFollows) {
final int ind = errorMessage.indexOf('\\');
if (ind > 0 && StringTools.isPositiveNumber(errorMessage.charAt(ind + 1))) {
int numLen = 1;
while (ind + numLen < errorMessage.length()
&& StringTools.isPositiveNumber(errorMessage.charAt(ind + numLen))) {
numLen++;
}
final int j = Integer.parseInt(errorMessage.substring(ind + 1, ind
+ numLen)) - 1;
int repTokenPos = 0;
int nextTokenPos = 0;
for (int l = 0; l <= j; l++) {
repTokenPos += positions[l];
}
if (j <= positions.length) {
nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
}
if (suggestionMatches != null) {
if (matchCounter < suggestionMatches.size()) {
numbersToMatches[j] = matchCounter;
if (suggestionMatches.get(matchCounter) != null) {
final String[] matches = concatMatches(matchCounter, j,
firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
final String leftSide = errorMessage.substring(0, ind);
final String rightSide = errorMessage.substring(ind + numLen);
if (matches.length == 1) {
errorMessage = leftSide + matches[0] + rightSide;
} else {
errorMessage = formatMultipleSynthesis(matches, leftSide,
rightSide);
}
matchCounter++;
newWay = true;
}
} else {
// FIXME: is this correct? this is how we deal with multiple matches
suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
}
}
if (!newWay) {
// in case elements weren't used (yet)
errorMessage = errorMessage.replace("\\" + (j + 1),
tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
}
}
errMarker = errorMessage.indexOf('\\');
numberFollows = false;
errLen = errorMessage.length();
if (errMarker > 0 && errMarker < errLen - 1) {
numberFollows = StringTools.isPositiveNumber(errorMessage
.charAt(errMarker + 1));
}
}
return errorMessage;
}
private static String formatMultipleSynthesis(final String[] matches,
final String leftSide, final String rightSide) {
String errorMessage = "";
String suggestionLeft = "";
String suggestionRight = "";
String rightSideNew = rightSide;
final int sPos = leftSide.lastIndexOf(SUGG_TAG);
if (sPos > 0) {
suggestionLeft = leftSide.substring(sPos + SUGG_TAG.length());
}
if (StringTools.isEmpty(suggestionLeft)) {
errorMessage = leftSide;
} else {
errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGG_TAG))
+ SUGG_TAG;
}
final int rPos = rightSide.indexOf(END_SUGG_TAG);
if (rPos > 0) {
suggestionRight = rightSide.substring(0, rPos);
}
if (!StringTools.isEmpty(suggestionRight)) {
rightSideNew = rightSide.substring(rightSide.indexOf(END_SUGG_TAG));
}
final int lastLeftSugEnd = leftSide.indexOf(END_SUGG_TAG);
final int lastLeftSugStart = leftSide.lastIndexOf(SUGG_TAG);
final StringBuilder sb = new StringBuilder();
sb.append(errorMessage);
for (int z = 0; z < matches.length; z++) {
sb.append(suggestionLeft);
sb.append(matches[z]);
sb.append(suggestionRight);
if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
sb.append(END_SUGG_TAG);
sb.append(", ");
sb.append(SUGG_TAG);
}
}
sb.append(rightSideNew);
return sb.toString();
}
/**
* For testing only.
*/
public final List getElements() {
return patternElements;
}
}