Package com.cybozu.labs.langdetect.util

Examples of com.cybozu.labs.langdetect.util.LangProfile


            if (file.getName().startsWith(".") || !file.isFile()) continue;
            FileInputStream is = null;
            try {
                LOGGER.debug("Loading language profile %s", file.toString());
                is = new FileInputStream(file);
                LangProfile profile = new LangProfile(new JSONObject(IOUtils.toString(is, "UTF-8")));
                try {
                    addProfile(profile, index, langsize);
                } catch (LangDetectException e) {
                    LOGGER.info("Duplicate language profile, %s, ", profile.name);
                }
View Full Code Here


     * @return Language profile instance
     * @throws LangDetectException
     */
    public static LangProfile generate(String lang, File textFile) throws LangDetectException {

        LangProfile profile = new LangProfile(lang);

        InputStream is = null;
        try {
            is = new BufferedInputStream(new FileInputStream(textFile));
            if (textFile.getName().endsWith(".gz")) is = new GZIPInputStream(is);

            BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
            String line;
            while ((line = reader.readLine()) != null) {
                NGram ngram = new NGram();
              for (char c : line.toCharArray()) {
          ngram.addChar((char) c);
          for (int i = 1; i <= NGram.N_GRAM; i++) {
            profile.add(ngram.get(i));
          }
              }
      }
        } catch (IOException e) {
            throw new LangDetectException(ErrorCode.CantOpenTrainData, "Can't open training database file '" + textFile.getName() + "'");
View Full Code Here

        buffer.append(' ');
      }
      buffer.append(line);
    }
    String storedProfile = buffer.toString();
    LangProfile langProfile = new LangProfile();

    Matcher m = FREQ_PATTERN.matcher(storedProfile);
    if (m.find()) {
      String[] entries = m.group(1).split(",");
      for (String entry : entries) {
        String[] keyValue = entry.split(":");
        String label = keyValue[0].trim().replace("\"", "");
        langProfile.getFreq().put(label, Integer.valueOf(keyValue[1]));
      }
    }

    m = N_WORDS_PATTERN.matcher(storedProfile);
    if (m.find()) {
      String[] nWords = m.group(1).split(",");
      langProfile.setNWords(new int[nWords.length]);
      for (int i = 0; i < nWords.length; i++) {
        langProfile.getNWords()[i] = Integer.parseInt(nWords[i]);
      }
    }
   
    m = NAME_PATTERN.matcher(storedProfile);
    if (m.find()) {
      langProfile.setName(m.group(1));
    }

    return langProfile;
  }
View Full Code Here

    @Before
    public void setUp() throws Exception {
        DetectorFactory.clear();
       
        LangProfile profile_en = new LangProfile("en");
        for (String w : TRAINING_EN.split(" "))
            profile_en.add(w);
        DetectorFactory.addProfile(profile_en, 0, 3);

        LangProfile profile_fr = new LangProfile("fr");
        for (String w : TRAINING_FR.split(" "))
            profile_fr.add(w);
        DetectorFactory.addProfile(profile_fr, 1, 3);

        LangProfile profile_ja = new LangProfile("ja");
        for (String w : TRAINING_JA.split(" "))
            profile_ja.add(w);
        DetectorFactory.addProfile(profile_ja, 2, 3);
    }
View Full Code Here

        for (File file: listFiles) {
            if (file.getName().startsWith(".") || !file.isFile()) continue;
            InputStream is = null;
            try {
                is = new FileInputStream(file);
                LangProfile profile = LangProfileFactory.readProfile(is);
                addProfile(profile, index, langsize);
                ++index;
            } catch (IOException e) {
                throw new LangDetectException(ErrorCode.FileLoadError, "can't open '" + file.getName() + "'");
            } finally {
View Full Code Here

        in = classLoader.getResourceAsStream(languageFileName);
        if (in == null) {
          continue;
        }
        assert in.available() > 0;
                LangProfile profile = LangProfileFactory.readProfile(in);
                addProfile(profile, index, languages.length);
                ++index;
            } catch (IOException e) {
                throw new LangDetectException(ErrorCode.FileLoadError, "can't open '" + languageFileName + "'");
      } finally {
View Full Code Here

     * @return Language profile instance
     * @throws LangDetectException
     */
    public static LangProfile load(String lang, File file) throws LangDetectException {

        LangProfile profile = new LangProfile(lang);

        InputStream is = null;
        try {
            is = new BufferedInputStream(new FileInputStream(file));
            if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
View Full Code Here

            return;
        }

        ObjectOutputStream os = null;
        try {
            LangProfile profile = GenProfile.load(lang, file);
            profile.omitLessFreq();
            LangProfileFactory.writeProfile(profile, new FileOutputStream(new File(lang)));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (LangDetectException e) {
            e.printStackTrace();
View Full Code Here

    checkProfileCopy("nl");
  }

  protected void checkProfileCopy(String language) throws FileNotFoundException, IOException {
    File originalFile = new File(PROFILE_DIR, language);
    final LangProfile originalProfile = readProfileFile(originalFile);
    File newFile = File.createTempFile("profile-copy-", null);
    FileOutputStream output = null;
    try {
      output = new FileOutputStream(newFile);
      LangProfileFactory.writeProfile(originalProfile, output);
      LangProfile newProfile = readProfileFile(newFile);
      assertThat(newProfile.getFreq().size(), is(equalTo(originalProfile.getFreq().size())));
      assertThat(newProfile.getFreq(), is(equalTo(originalProfile.getFreq())));
      assertThat(newProfile.getNWords(), is(equalTo(originalProfile.getNWords())));
      assertThat(newProfile.getName(), is(equalTo(originalProfile.getName())));
    } finally {
      IOUtils.closeQuietly(output);
            //noinspection ResultOfMethodCallIgnored
            newFile.delete();
    }
View Full Code Here

    }
  }

  private static void checkProfileFile(String language, int nWordSize, int freqSize) throws IOException {
    File profileFile = new File(PROFILE_DIR, language);
    final LangProfile langProfile = readProfileFile(profileFile);
    assertThat(langProfile, is(notNullValue()));
    assertThat(langProfile.getName(), is(equalTo(language)));
    assertThat(langProfile.getNWords(), is(notNullValue()));
    assertThat(langProfile.getNWords().length, is(equalTo(nWordSize)));
    assertThat(langProfile.getFreq(), is(notNullValue()));
    assertThat(langProfile.getFreq().size(), is(equalTo(freqSize)));
  }
View Full Code Here

TOP

Related Classes of com.cybozu.labs.langdetect.util.LangProfile

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.