/* LanguageTool, a natural language style checker
* Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.patterns;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang.ObjectUtils;
import org.languagetool.Language;
import org.languagetool.rules.Category;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.IncorrectExample;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
public class PatternRuleHandler extends XMLRuleHandler {
public static final String TYPE = "type";
static final String MARKER_TAG = "<marker>";
static final String PLEASE_SPELL_ME = "<pleasespellme/>";
protected Category category;
protected String categoryIssueType;
protected String ruleGroupIssueType;
protected String ruleIssueType;
protected String name;
protected String filterClassName;
protected String filterArgs;
private int subId;
private boolean defaultOff;
private boolean defaultOn;
private String ruleGroupDescription;
private int startPos = -1;
private int endPos = -1;
private int tokenCountForMarker = 0;
private int antiPatternCounter = 0;
private boolean inRule;
private List<DisambiguationPatternRule> rulegroupAntiPatterns;
private List<DisambiguationPatternRule> ruleAntiPatterns;
private boolean relaxedMode = false;
private boolean inAntiPattern;
/**
* If set to true, don't throw an exception if id or name is not set.
* Used for online rule editor.
* @since 2.1
*/
void setRelaxedMode(boolean relaxedMode) {
this.relaxedMode = relaxedMode;
}
// ===========================================================
// SAX DocumentHandler methods
// ===========================================================
@Override
public void startElement(final String namespaceURI, final String lName,
final String qName, final Attributes attrs) throws SAXException {
switch (qName) {
case "category":
final String catName = attrs.getValue(NAME);
final String priorityStr = attrs.getValue("priority");
if (priorityStr == null) {
category = new Category(catName);
} else {
category = new Category(catName, Integer.parseInt(priorityStr));
}
if ("off".equals(attrs.getValue(DEFAULT))) {
category.setDefaultOff();
}
if (attrs.getValue(TYPE) != null) {
categoryIssueType = attrs.getValue(TYPE);
}
break;
case "rules":
final String languageStr = attrs.getValue("lang");
language = Language.getLanguageForShortName(languageStr);
break;
case RULE:
inRule = true;
shortMessage = new StringBuilder();
message = new StringBuilder();
suggestionsOutMsg = new StringBuilder();
url = new StringBuilder();
id = attrs.getValue(ID);
name = attrs.getValue(NAME);
if (inRuleGroup) {
subId++;
if (id == null) {
id = ruleGroupId;
}
if (name == null) {
name = ruleGroupDescription;
}
}
if (!(inRuleGroup && defaultOff)) {
defaultOff = "off".equals(attrs.getValue(DEFAULT));
}
if (!(inRuleGroup && defaultOn)) {
defaultOn = "on".equals(attrs.getValue(DEFAULT));
}
correctExamples = new ArrayList<>();
incorrectExamples = new ArrayList<>();
if (suggestionMatches != null) {
suggestionMatches.clear();
}
if (suggestionMatchesOutMsg != null) {
suggestionMatchesOutMsg.clear();
}
if (attrs.getValue(TYPE) != null) {
ruleIssueType = attrs.getValue(TYPE);
}
break;
case PATTERN:
startPattern(attrs);
tokenCountForMarker = 0;
break;
case ANTIPATTERN:
inAntiPattern = true;
antiPatternCounter++;
caseSensitive = YES.equals(attrs.getValue(CASE_SENSITIVE));
tokenCounter = 0;
tokenCountForMarker = 0;
break;
case AND:
inAndGroup = true;
tokenCountForMarker++;
break;
case OR:
inOrGroup = true;
tokenCountForMarker++;
break;
case UNIFY:
inUnification = true;
uniNegation = YES.equals(attrs.getValue(NEGATE));
break;
case UNIFY_IGNORE:
inUnificationNeutral = true;
break;
case FEATURE:
uFeature = attrs.getValue(ID);
break;
case TYPE:
uType = attrs.getValue(ID);
uTypeList.add(uType);
break;
case TOKEN:
setToken(attrs);
if (!inAndGroup && !inOrGroup) {
tokenCountForMarker++;
}
break;
case EXCEPTION:
setExceptions(attrs);
break;
case EXAMPLE:
if (attrs.getValue(TYPE).equals("correct")) {
inCorrectExample = true;
correctExample = new StringBuilder();
} else if (attrs.getValue(TYPE).equals("incorrect")) {
inIncorrectExample = true;
incorrectExample = new StringBuilder();
exampleCorrection = new StringBuilder();
if (attrs.getValue("correction") != null) {
exampleCorrection.append(attrs.getValue("correction"));
}
}
break;
case "filter":
filterClassName = attrs.getValue("class");
filterArgs = attrs.getValue("args");
break;
case MESSAGE:
inMessage = true;
inSuggestion = false;
message = new StringBuilder();
break;
case SUGGESTION:
if (YES.equals(attrs.getValue("suppress_misspelled"))) {
message.append(PLEASE_SPELL_ME);
}
if (inMessage) {
message.append("<suggestion>");
} else { //suggestions outside message
suggestionsOutMsg.append("<suggestion>");
}
inSuggestion = true;
break;
case "short":
if (inRule) {
inShortMessage = true;
shortMessage = new StringBuilder();
} else {
inShortMessageForRuleGroup = true;
shortMessageForRuleGroup = new StringBuilder();
}
break;
case "url":
if (inRule) {
inUrl = true;
url = new StringBuilder();
} else {
inUrlForRuleGroup = true;
urlForRuleGroup = new StringBuilder();
}
break;
case RULEGROUP:
ruleGroupId = attrs.getValue(ID);
ruleGroupDescription = attrs.getValue(NAME);
defaultOff = "off".equals(attrs.getValue(DEFAULT));
defaultOn = "on".equals(attrs.getValue(DEFAULT));
urlForRuleGroup = new StringBuilder();
shortMessageForRuleGroup = new StringBuilder();
inRuleGroup = true;
subId = 0;
if (attrs.getValue(TYPE) != null) {
ruleGroupIssueType = attrs.getValue(TYPE);
}
break;
case MATCH:
setMatchElement(attrs);
break;
case MARKER:
if (inIncorrectExample) {
incorrectExample.append(MARKER_TAG);
} else if (inCorrectExample) {
correctExample.append(MARKER_TAG);
} else if (inPattern || inAntiPattern) {
startPos = tokenCounter;
inMarker = true;
}
break;
case UNIFICATION:
uFeature = attrs.getValue("feature");
inUnificationDef = true;
break;
case "equivalence":
uType = attrs.getValue(TYPE);
break;
case PHRASES:
inPhrases = true;
break;
case "includephrases":
phraseElementInit();
break;
case "phrase":
if (inPhrases) {
phraseId = attrs.getValue(ID);
}
break;
case "phraseref":
if (attrs.getValue("idref") != null) {
preparePhrase(attrs);
tokenCountForMarker++;
}
break;
}
}
@Override
public void endElement(final String namespaceURI, final String sName,
final String qName) throws SAXException {
switch (qName) {
case "category":
categoryIssueType = null;
break;
case RULE:
suggestionMatchesOutMsg = addLegacyMatches(suggestionMatchesOutMsg, suggestionsOutMsg.toString(), false);
phraseElementInit();
if (relaxedMode && id == null) {
id = "";
}
if (relaxedMode && name == null) {
name = "";
}
if (phraseElementList.isEmpty()) {
// Elements contain information whether they are inside a <marker>...</marker>,
// but for phraserefs this depends on the position where the phraseref is used
// not where it's defined. Thus we have to copy the elements so each use of
// the phraseref can carry their own information:
final List<Element> tmpElements = new ArrayList<>();
createRules(new ArrayList<>(elementList), tmpElements, 0);
} else {
if (!elementList.isEmpty()) {
for (List<Element> ph : phraseElementList) {
ph.addAll(new ArrayList<>(elementList));
}
}
for (List<Element> phraseElement : phraseElementList) {
processElement(phraseElement);
final List<Element> tmpElements = new ArrayList<>();
createRules(phraseElement, tmpElements, 0);
}
}
elementList.clear();
if (phraseElementList != null) {
phraseElementList.clear();
}
ruleIssueType = null;
inRule = false;
filterClassName = null;
filterArgs = null;
break;
case EXCEPTION:
finalizeExceptions();
break;
case AND:
inAndGroup = false;
andGroupCounter = 0;
tokenCounter++;
break;
case OR:
inOrGroup = false;
orGroupCounter = 0;
tokenCounter++;
break;
case TOKEN:
finalizeTokens();
break;
case PATTERN:
inPattern = false;
if (lastPhrase) {
elementList.clear();
}
tokenCounter = 0;
break;
case ANTIPATTERN:
String antiId = id;
if (inRuleGroup) {
if (subId > 0) {
antiId = ruleGroupId + "[" + subId + "]";
} else {
antiId = ruleGroupId;
}
}
final DisambiguationPatternRule rule = new DisambiguationPatternRule(
antiId + "_antipattern:" + antiPatternCounter,
"antipattern", language, elementList, null, null,
DisambiguationPatternRule.DisambiguatorAction.IMMUNIZE);
if (startPos != -1 && endPos != -1) {
rule.setStartPositionCorrection(startPos);
rule.setEndPositionCorrection(endPos - tokenCountForMarker);
} else {
// No '<marker>'? Then add artificial <marker>s around all tokens to work
// around issue https://github.com/languagetool-org/languagetool/issues/189:
for (Element element : elementList) {
element.setInsideMarker(true);
}
}
elementList.clear();
if (inRule) {
if (ruleAntiPatterns == null) {
ruleAntiPatterns = new ArrayList<>();
}
ruleAntiPatterns.add(rule);
} else { // a rulegroup shares all antipatterns not included in a single rule
if (rulegroupAntiPatterns == null) {
rulegroupAntiPatterns = new ArrayList<>();
}
rulegroupAntiPatterns.add(rule);
}
tokenCounter = 0;
inAntiPattern = false;
break;
case EXAMPLE:
if (inCorrectExample) {
correctExamples.add(correctExample.toString());
} else if (inIncorrectExample) {
final IncorrectExample example;
final String[] corrections = exampleCorrection.toString().split("\\|");
if (corrections.length > 0 && corrections[0].length() > 0) {
example = new IncorrectExample(incorrectExample.toString(), corrections);
} else {
example = new IncorrectExample(incorrectExample.toString());
}
incorrectExamples.add(example);
}
inCorrectExample = false;
inIncorrectExample = false;
correctExample = new StringBuilder();
incorrectExample = new StringBuilder();
exampleCorrection = new StringBuilder();
break;
case MESSAGE:
suggestionMatches = addLegacyMatches(suggestionMatches, message.toString(), true);
inMessage = false;
break;
case SUGGESTION:
if (inMessage) {
message.append("</suggestion>");
} else { //suggestion outside message
suggestionsOutMsg.append("</suggestion>");
}
inSuggestion = false;
break;
case "short":
inShortMessage = false;
inShortMessageForRuleGroup = false;
break;
case "url":
inUrl = false;
inUrlForRuleGroup = false;
break;
case MATCH:
if (inMessage) {
suggestionMatches.get(suggestionMatches.size() - 1).
setLemmaString(match.toString());
} else if (inSuggestion) {
suggestionMatchesOutMsg.get(suggestionMatchesOutMsg.size() - 1).
setLemmaString(match.toString());
} else if (inToken) {
tokenReference.setLemmaString(match.toString());
}
inMatch = false;
break;
case RULEGROUP:
urlForRuleGroup = new StringBuilder();
shortMessageForRuleGroup = new StringBuilder();
inRuleGroup = false;
ruleGroupIssueType = null;
if (rulegroupAntiPatterns != null) {
rulegroupAntiPatterns.clear();
}
antiPatternCounter = 0;
break;
case MARKER:
if (inCorrectExample) {
correctExample.append("</marker>");
} else if (inIncorrectExample) {
incorrectExample.append("</marker>");
} else if (inPattern || inAntiPattern) {
endPos = tokenCountForMarker;
inMarker = false;
}
break;
case "phrase":
if (inPhrases) {
finalizePhrase();
}
break;
case "includephrases":
elementList.clear();
break;
case PHRASES:
if (inPhrases) {
inPhrases = false;
}
break;
case UNIFICATION:
inUnificationDef = false;
break;
case FEATURE:
equivalenceFeatures.put(uFeature, uTypeList);
uTypeList = new ArrayList<>();
break;
case UNIFY:
inUnification = false;
//clear the features...
equivalenceFeatures = new HashMap<>();
//set negation on the last token only!
final int lastElement = elementList.size() - 1;
elementList.get(lastElement).setLastInUnification();
if (uniNegation) {
elementList.get(lastElement).setUniNegation();
}
break;
case UNIFY_IGNORE:
inUnificationNeutral = false;
break;
}
}
/**
* Create rule from an Element list.
* In case of OR groups, several rules are created recursively.
* @since 2.3
*
* @param elemList The complete original Element list
* @param tmpElements Temporary Element list being created
* @param numElement Index of elemList being analyzed
*/
private void createRules(List<Element> elemList,
List<Element> tmpElements, int numElement) {
String shortMessage = "";
if (this.shortMessage != null && this.shortMessage.length() > 0) {
shortMessage = this.shortMessage.toString();
} else if (shortMessageForRuleGroup != null && shortMessageForRuleGroup.length() > 0) {
shortMessage = this.shortMessageForRuleGroup.toString();
}
if (numElement >= elemList.size()) {
final PatternRule rule = new PatternRule(id, language, tmpElements, name,
message.toString(), shortMessage,
suggestionsOutMsg.toString(), phraseElementList.size() > 1);
if (filterClassName != null && filterArgs != null) {
RuleFilterCreator creator = new RuleFilterCreator();
RuleFilter filter = creator.getFilter(filterClassName);
rule.setFilter(filter);
rule.setFilterArguments(filterArgs);
}
prepareRule(rule);
rules.add(rule);
} else {
Element element = elemList.get(numElement);
if (element.hasOrGroup()) {
for (Element elementOfOrGroup : element.getOrGroup()) {
final List<Element> tmpElements2 = new ArrayList<>();
tmpElements2.addAll(tmpElements);
tmpElements2.add((Element) ObjectUtils.clone(elementOfOrGroup));
createRules(elemList, tmpElements2, numElement + 1);
}
}
tmpElements.add((Element) ObjectUtils.clone(element));
createRules(elemList, tmpElements, numElement + 1);
}
}
protected void prepareRule(final PatternRule rule) {
if (startPos != -1 && endPos != -1) {
rule.setStartPositionCorrection(startPos);
rule.setEndPositionCorrection(endPos - tokenCountForMarker);
}
startPos = -1;
endPos = -1;
rule.setCorrectExamples(correctExamples);
rule.setIncorrectExamples(incorrectExamples);
rule.setCategory(category);
if (rulegroupAntiPatterns != null && !rulegroupAntiPatterns.isEmpty()) {
rule.setAntiPatterns(rulegroupAntiPatterns);
}
if (ruleAntiPatterns != null && !ruleAntiPatterns.isEmpty()) {
rule.setAntiPatterns(ruleAntiPatterns);
ruleAntiPatterns.clear();
}
if (inRuleGroup) {
rule.setSubId(Integer.toString(subId));
} else {
rule.setSubId("1");
}
caseSensitive = false;
if (suggestionMatches != null) {
for (final Match m : suggestionMatches) {
rule.addSuggestionMatch(m);
}
if (phraseElementList.size() <= 1) {
suggestionMatches.clear();
}
}
if (suggestionMatchesOutMsg != null) {
for (final Match m : suggestionMatchesOutMsg) {
rule.addSuggestionMatchOutMsg(m);
}
suggestionMatchesOutMsg.clear();
}
if (defaultOff) {
rule.setDefaultOff();
}
if (category == null) {
throw new RuntimeException("Cannot activate rule '" + id + "', it is outside of a <category>...</category>");
}
if (category.isDefaultOff() && !defaultOn) {
rule.setDefaultOff();
}
if (url != null && url.length() > 0) {
try {
rule.setUrl(new URL(url.toString()));
} catch (MalformedURLException e) {
throw new RuntimeException("Could not parse URL for rule: " + rule + ": '" + url + "'", e);
}
} else if (urlForRuleGroup != null && urlForRuleGroup.length() > 0) {
try {
rule.setUrl(new URL(urlForRuleGroup.toString()));
} catch (MalformedURLException e) {
throw new RuntimeException("Could not parse URL for rule: " + rule + ": '" + urlForRuleGroup + "'", e);
}
}
// inheritance of values - if no type value is defined for a rule, take the rule group's value etc:
if (ruleIssueType != null) {
rule.setLocQualityIssueType(ITSIssueType.getIssueType(ruleIssueType));
} else if (ruleGroupIssueType != null) {
rule.setLocQualityIssueType(ITSIssueType.getIssueType(ruleGroupIssueType));
} else if (categoryIssueType != null) {
rule.setLocQualityIssueType(ITSIssueType.getIssueType(categoryIssueType));
}
}
@Override
public void characters(final char[] buf, final int offset, final int len) {
final String s = new String(buf, offset, len);
if (inException) {
exceptions.append(s);
} else if (inToken) {
elements.append(s);
} else if (inCorrectExample) {
correctExample.append(s);
} else if (inIncorrectExample) {
incorrectExample.append(s);
} else if (inMatch) {
match.append(s);
} else if (inMessage) {
message.append(s);
} else if (inSuggestion) { //Suggestion outside message
suggestionsOutMsg.append(s);
} else if (inShortMessage) {
shortMessage.append(s);
} else if (inShortMessageForRuleGroup) {
shortMessageForRuleGroup.append(s);
} else if (inUrl) {
url.append(s);
} else if (inUrlForRuleGroup) {
urlForRuleGroup.append(s);
}
}
}