Package edu.umd.cloud9.webgraph.data

Examples of edu.umd.cloud9.webgraph.data.AnchorText


    }
    return cnt;
  }

  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }
View Full Code Here


    });
    grammar_AP.put(2, new String[] {
        "35", "0.1807","36", "0.0027","33", "0.0241","34", "0.0044","39", "0.3323","37", "0.3308","38", "0.0064","43", "0.0","42", "0.4085","41", "0.2136","40", "0.5166","67", "0.6253","66", "0.2055","69", "0.5464","68", "0.2556","26", "0.0082","27", "0.3113","28", "0.0058","29", "0.5319","30", "0.6832","32", "0.3168","31", "0.081","70", "0.5252","71", "0.219","72", "0.0278","73", "0.2601","74", "0.0302","75", "0.1449","59", "0.7421","58", "0.1165","57", "0.0905","56", "0.3992","55", "0.636","64", "6.0E-4","65", "0.5581","62", "0.0492","63", "0.0815","60", "0.7408","61", "0.0905","49", "0.7575","48", "0.7018","45", "0.3528","44", "0.1752","47", "0.3373","46", "0.3308","51", "0.0628","52", "0.0749","53", "0.0038","54", "0.0415","50", "0.7537"
    });

    HMapSFW tenbestAPMap = array2Map(Nbest_AP.get(2));
    HMapSFW onebestAPMap = array2Map(Onebest_AP.get(2));
    HMapSFW grammarAPMap = array2Map(grammar_AP.get(2));
    HMapSFW tokenAPMap = array2Map(baseline_token_AP);
    HMapSFW gridAPMap = array2Map(Interp_AP.get(2));

    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, grammarAPMap));
View Full Code Here

    }
    return cnt;
  }

  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }
View Full Code Here

    return new JUnit4TestAdapter(EnFr_CLEF06.class);
  }

  public static void main(String[] args) {
    initialize();
    HMapSFW tenbestAPMap = array2Map(Nbest_AP.get(2));
    HMapSFW onebestAPMap = array2Map(Onebest_AP.get(2));
    HMapSFW grammarAPMap = array2Map(grammar_AP.get(2));
    HMapSFW tokenAPMap = array2Map(baseline_token_AP);
    HMapSFW gridAPMap = array2Map(Interp_AP.get(2));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, grammarAPMap));
    System.out.println(countNumberOfNegligibleTopics(tokenAPMap, gridAPMap));
View Full Code Here

    }
    return cnt;
  }

  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }
View Full Code Here

    return new JUnit4TestAdapter(EnZh_NTCIR8.class);
  }

  public static void main(String[] args) {
    //    HMapSFW gridAPMap = array2Map(Interp_AP);
    HMapSFW tenbestAPMap = array2Map(Nbest_AP.get(2));
    HMapSFW onebestAPMap = array2Map(Onebest_AP.get(1));
    HMapSFW grammarAPMap = array2Map(grammar_AP.get(0));
    HMapSFW tokenAPMap = array2Map(baseline_token_AP);
    //    System.out.println(countNumberOfImprovedTopics(tokenAPMap, gridAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tenbestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, onebestAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, grammarAPMap));
    System.out.println(countNumberOfImprovedTopics(tokenAPMap, tokenAPMap));
View Full Code Here

    }
    return cnt;
  }

  private static HMapSFW array2Map(String[] array) {
    HMapSFW map = new HMapSFW();
    for ( int i = 0; i < array.length; i += 2 ) {
      map.put(array[i], Float.parseFloat(array[i+1]));
    }
    return map;
  }
View Full Code Here

    transPhrase = transPhrase.trim();

    //LOG.info("Found translation phrase " + transPhrase);

    if (!phrase2score.containsKey(fPhrase)) {
      phrase2score.put(fPhrase, new HMapSFW());
    }
    // if same phrase extracted from multiple rules, average prob.s

    HMapKF<String> scoreTable = phrase2score.get(fPhrase);
View Full Code Here

    // scfgDist table is a set of (source_token --> X) maps, where X is a set of (token_trans --> score) maps
    Map<String,HMapSFW> scfgDist = new HashMap<String,HMapSFW>();

    // phrase2count table is a set of (source_phrase --> X) maps, where X is a set of (phrase_trans --> count) maps
    HMapSFW phraseDist = new HMapSFW();

    HMapSIW srcTokenCnt = new HMapSIW();

    Set<String> bagOfTargetTokens = new HashSet<String>();
View Full Code Here

          continue;     
        }
        bagOfTargetTokens.add(eTerm);
        if (isOne2Many <= 1) {
          if (probDist.containsKey(fTerm)) {
            HMapSFW eToken2Prob = probDist.get(fTerm);
            eToken2Prob.increment(eTerm, weight);
          }else {
            HMapSFW eToken2Prob = new HMapSFW();
            eToken2Prob.put(eTerm, weight);
            probDist.put(fTerm, eToken2Prob);
          }
        }
      }

      if (isOne2Many == 2) {
        // if ids.size() > 1 eTerm is a multi-token expression
        // even if eTerm is overwritten here, we need to do above loop to update bagOfTargetTokens
        if (ids.size() > 1) {
          eTerm = isConsecutiveWithStopwords(ids, rhs, docLangTokenizer);     // <---- heuristic
        }

        // no proper translation on target-side (e.g., stopword OR non-consecutive multi-word translation), let's skip
        if (eTerm == null) {
          continue;
        }

        eTerm = Utils.removeBorderStopWords(docLangTokenizer, eTerm);
       
        // this is difference between one-to-many and one-to-one heuristics for 1-best MT case
        // we add multi-token expressions in addition to single target tokens,
        bagOfTargetTokens.add(eTerm);

        // update prob. distr.
        if (probDist.containsKey(fTerm)) {
          HMapSFW eToken2Prob = probDist.get(fTerm);
          eToken2Prob.increment(eTerm, weight);
        }else {
          HMapSFW eToken2Prob = new HMapSFW();
          eToken2Prob.put(eTerm, weight);
          probDist.put(fTerm, eToken2Prob);
        }
      }
    }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.webgraph.data.AnchorText

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.