Package org.languagetool.rules

Examples of org.languagetool.rules.ConfusionSetLoader


  public RuleCreator(float minErrorProb) {
    this.minErrorProb = minErrorProb;
  }

  private void run(File homophoneOccurrences, String homophonePath) throws IOException {
    ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader();
    InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(homophonePath);
    Map<String,ConfusionProbabilityRule.ConfusionSet> confusionSetMap = confusionSetLoader.loadConfusionSet(inputStream);
    initMaps(homophoneOccurrences);
    int groupCount = 0;
    if (XML_MODE) {
      System.out.println("<rules lang='en'>\n");
      System.out.println("<category name='Auto-generated rules'>\n");
View Full Code Here


    return result;
  }

  private void run(String homophonePath) throws IOException {
    System.err.println("Loading homophones from " + homophonePath + ", minimum occurrence: " + MIN_COUNT);
    ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader();
    InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(homophonePath);
    Map<String,ConfusionProbabilityRule.ConfusionSet> map = confusionSetLoader.loadConfusionSet(inputStream);
    Set<String> confusionTerms = map.keySet();
    dumpOccurrences(confusionTerms);
  }
View Full Code Here

* @since 2.7
*/
final class ConfusionSetUrlGenerator {

  public static void main(String[] args) throws IOException {
    ConfusionSetLoader confusionSetLoader =  new ConfusionSetLoader();
    InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/homophones.txt");
    Map<String,ConfusionProbabilityRule.ConfusionSet> map = confusionSetLoader.loadConfusionSet(inputStream);
    String url = "http://storage.googleapis.com/books/ngrams/books/googlebooks-eng-all-2gram-20120701-<XX>.gz";
    Set<String> nameSet = new HashSet<>();
    for (String s : map.keySet()) {
      if (s.length() < 2) {
        nameSet.add(s.substring(0, 1).toLowerCase() + "_");
View Full Code Here

    System.out.println(" (Lookups is the number of lookups needed to see which word in the homophones set " +
            "is more common. Actually even more ngram lookups will be needed, depending on what ngrams we have.)");
  }

  private Map<String, ConfusionProbabilityRule.ConfusionSet> getConfusionSet() throws IOException {
    ConfusionSetLoader loader = new ConfusionSetLoader();
    InputStream homophoneStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/homophones.txt");
    return loader.loadConfusionSet(homophoneStream);
  }
View Full Code Here

  private int globalSentenceCount;
  private int globalRuleMatches;

  RealWordFalseAlarmEvaluator(File languageModelIndexDir) throws IOException {
    InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/homophones.txt");
    ConfusionSetLoader confusionSetLoader;
    if (EVAL_MODE) {
      InputStream infoStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/en/homophones-info.txt");
      confusionSetLoader =  new ConfusionSetLoader(infoStream, MIN_SENTENCES, MAX_ERROR_RATE);
    } else {
      confusionSetLoader =  new ConfusionSetLoader();
    }
    confusionSet = confusionSetLoader.loadConfusionSet(inputStream);
    langTool = new JLanguageTool(new BritishEnglish());
    //langTool.activateDefaultPatternRules();
    List<Rule> rules = langTool.getAllActiveRules();
    for (Rule rule : rules) {
      langTool.disableRule(rule.getId());
View Full Code Here

TOP

Related Classes of org.languagetool.rules.ConfusionSetLoader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.