Examples of LanguageIdentifier


Examples of com.stimulus.archiva.language.LanguageIdentifier

                logger.debug("detected language from the email header. {language='"+lang+"'}");
              } else {
                logger.debug("email did not contain language header field. analyzing text to determine language.");
               
                if (languageIdentifier==null)
                 languageIdentifier= new LanguageIdentifier();
               
                lang = languageIdentifier.identify(detectReader);
              }
            } catch (Exception e) {
              logger.debug("exception occurred while detecting indexing language.",e);
View Full Code Here

Examples of org.apache.nutch.analysis.lang.LanguageIdentifier

    public static final String LANGUAGE_TAG="TAG_LANGUAGE_";

    public LanguageDetectionModule(String name, Config globalConfig) {
        super(name, globalConfig);
        Configuration conf = NutchConfiguration.create();
        langIdentif = new LanguageIdentifier(conf);
    }
View Full Code Here

Examples of org.apache.nutch.analysis.lang.custom.LanguageIdentifier

   * Detect language from a String
   * @param text
   * @return detected language
   */
  public static String detectLanguage(String text) {
    LanguageIdentifier li = getLIInstance();

    return li.identify(text);
  }
View Full Code Here

Examples of org.apache.nutch.analysis.lang.custom.LanguageIdentifier

   * @param is
   * @return detected language
   * @throws IOException
   */
  public static String detectLanguage(InputStream is) throws IOException {
    LanguageIdentifier li = getLIInstance();
    return li.identify(is);
  }
View Full Code Here

Examples of org.apache.nutch.analysis.lang.custom.LanguageIdentifier

   * @param charset
   * @return detected language
   * @throws IOException
   */
  public static String detectLanguage(InputStream is, String charset) throws IOException {
    LanguageIdentifier li = getLIInstance();
    return li.identify(is, charset);
  }
View Full Code Here

Examples of org.apache.nutch.analysis.lang.custom.LanguageIdentifier

    return li.identify(is, charset);
  }

  private static LanguageIdentifier getLIInstance() {
    if (langID == null) {
      langID = new LanguageIdentifier();
    }
    return (langID);
  }
View Full Code Here

Examples of org.apache.tika.language.LanguageIdentifier

       
        result = getFirstLanguage(result);
       
        if (result == null) {
            // Language is still unspecified, so use ProfileHandler's result
            LanguageIdentifier langIdentifier = profilingHandler.getLanguage();
            // FUTURE KKr - provide config for specifying required certainty level.
            if (langIdentifier.isReasonablyCertain()) {
                result = langIdentifier.getLanguage();
                LOGGER.trace("Using language specified by profiling handler: " + result);
            } else {
                result = "";
            }
View Full Code Here

Examples of org.apache.tika.language.LanguageIdentifier

    String content = textHandler.toString();

    if (languageDetection) {
      String languageCode = bean.getString("languagecode");
      if (languageCode == null || languageCode.equals("")) {
        LanguageIdentifier identifier = new LanguageIdentifier(content);
        String lang = identifier.getLanguage();
        if (identifier.isReasonablyCertain() && (allowedLanguages == null || allowedLanguages.contains(lang))) {
          bean.set("languagecode", lang);
        }
      }
    }
View Full Code Here

Examples of org.apache.tika.language.LanguageIdentifier

  @Test
  public void testLanguageIndentifier() {
    try {
      long total = 0;
      LanguageIdentifier identifier;
      BufferedReader in = new BufferedReader(new InputStreamReader(this
          .getClass().getResourceAsStream("test-referencial.txt")));
      String line = null;
      while ((line = in.readLine()) != null) {
        String[] tokens = line.split(";");
        if (!tokens[0].equals("")) {
          StringBuilder content = new StringBuilder();
          // Test each line of the file...
          BufferedReader testFile = new BufferedReader(new InputStreamReader(
              this.getClass().getResourceAsStream(tokens[0]), "UTF-8"));
          String testLine = null, lang = null;
          while ((testLine = testFile.readLine()) != null) {
            content.append(testLine + "\n");
            testLine = testLine.trim();
            if (testLine.length() > 256) {
              identifier = new LanguageIdentifier(testLine);
              lang = identifier.getLanguage();
              Assert.assertEquals(tokens[1], lang);
            }
          }
          testFile.close();

          // Test the whole file
          long start = System.currentTimeMillis();
          System.out.println(content.toString());
          identifier = new LanguageIdentifier(content.toString());
          lang = identifier.getLanguage();
          System.out.println(lang);
          total += System.currentTimeMillis() - start;
          Assert.assertEquals(tokens[1], lang);
        }
      }
View Full Code Here

Examples of org.apache.tika.language.LanguageIdentifier

    final String text = StringTools.readFile(new FileInputStream(filename), encoding);
    return detectLanguageOfString(text);
  }

  private static Language detectLanguageOfString(final String text) {
    final LanguageIdentifier identifier = new LanguageIdentifier(text);
    final Language lang = Language.getLanguageForShortName(identifier.getLanguage());
    return lang;
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.