Package org.olat.core.commons.services.text.impl.nutch.NGramProfile

Examples of org.olat.core.commons.services.text.impl.nutch.NGramProfile.NGramEntry


            profile.load(is);
            languages.add(profile);
            supportedLanguages.add(lang);
            List<NGramEntry> ngrams = profile.getSorted();
            for (int i=0; i<ngrams.size(); i++) {
                NGramEntry entry = ngrams.get(i);
                List<NGramEntry> registered = tmpIdx.get(entry);
                if (registered == null) {
                    registered = new ArrayList<NGramEntry>();
                    tmpIdx.put(entry, registered);
                }
                registered.add(entry);
                entry.setProfile(profile);
            }
            list.append(" " + lang + "(" + ngrams.size() + ")");
            is.close();
          } catch (IOException e1) {
            log.error("", e1);
          }
        }
      }
      // transform all ngrams lists to arrays for performances
      Iterator<NGramEntry> keys = tmpIdx.keySet().iterator();
      while (keys.hasNext()) {
        NGramEntry entry = keys.next();
        List<NGramEntry> l = tmpIdx.get(entry);
        if (l != null) {
          NGramEntry[] array = l.toArray(new NGramEntry[l.size()]);
          ngramsIdx.put(entry.getSeq(), array);
        }
      }
      log.info(list.toString());
      // Create the suspect profile
      suspect = new NGramProfile("suspect", minLength, maxLength);
View Full Code Here


    suspect.analyze(text);
    Iterator<NGramEntry> iter = suspect.getSorted().iterator();
    float topscore = Float.MIN_VALUE;
    String lang = "";
    HashMap<NGramProfile, Float> scores = new HashMap<NGramProfile, Float>();
    NGramEntry searched = null;
   
    while (iter.hasNext()) {
        searched = iter.next();
        NGramEntry[] ngrams = ngramsIdx.get(searched.getSeq());
        if (ngrams != null) {
            for (int j=0; j<ngrams.length; j++) {
                NGramProfile profile = ngrams[j].getProfile();
                Float pScore = scores.get(profile);
                if (pScore == null) {
                    pScore = new Float(0);
                }
                float plScore = pScore.floatValue();
                plScore += ngrams[j].getFrequency() + searched.getFrequency();
                scores.put(profile, new Float(plScore));
                if (plScore > topscore) {
                    topscore = plScore;
                    lang = profile.getName();
                }
View Full Code Here

TOP

Related Classes of org.olat.core.commons.services.text.impl.nutch.NGramProfile.NGramEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.