Examples of LangProfile


Examples of com.cybozu.labs.langdetect.util.LangProfile

    assertThat(langProfile.getFreq().size(), is(equalTo(freqSize)));
  }

  private static LangProfile readProfileFile(File profileFile) throws FileNotFoundException, IOException {
    FileInputStream input = null;
    final LangProfile langProfile;
    try {
      input = new FileInputStream(profileFile);
      langProfile = LangProfileFactory.readProfile(input);
    } finally {
      IOUtils.closeQuietly(input);
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

        writer.println("Oui ça va. Et toi ?");
      } finally {
        IOUtils.closeQuietly(writer);
      }
     
      LangProfile trucProfile = GenProfile.generate("truc", inputFile);
      HashMap<String, Integer> freqs = trucProfile.getFreq();
      assertThat(freqs, is(notNullValue()));
      assertThat(freqs.get("t"), is(equalTo(8)));
      assertThat(freqs.get("to"), is(equalTo(4)));
      assertThat(freqs.get("out"), is(equalTo(2)));
      assertThat(freqs.get("o"), is(equalTo(7)));
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

        for (File file: listFiles) {
            if (file.getName().startsWith(".") || !file.isFile()) continue;
            FileInputStream is = null;
            try {
                is = new FileInputStream(file);
                LangProfile profile = JSON.decode(is, LangProfile.class);
                addProfile(profile, index, langsize);
                ++index;
            } catch (JSONException e) {
                throw new LangDetectException(ErrorCode.FormatError, "profile format error in '" + file.getName() + "'");
            } catch (IOException e) {
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

        if (langsize < 2)
            throw new LangDetectException(ErrorCode.NeedLoadProfileError, "Need more than 2 profiles");
           
        for (String json: json_profiles) {
            try {
                LangProfile profile = JSON.decode(json, LangProfile.class);
                addProfile(profile, index, langsize);
                ++index;
            } catch (JSONException e) {
                throw new LangDetectException(ErrorCode.FormatError, "profile format error");
            }
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

     * @return Language profile instance
     * @throws LangDetectException
     */
    public static LangProfile loadFromWikipediaAbstract(String lang, File file) throws LangDetectException {

        LangProfile profile = new LangProfile(lang);

        BufferedReader br = null;
        try {
            InputStream is = new FileInputStream(file);
            if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
            br = new BufferedReader(new InputStreamReader(is, "utf-8"));

            TagExtractor tagextractor = new TagExtractor("abstract", 100);

            XMLStreamReader reader = null;
            try {
                XMLInputFactory factory = XMLInputFactory.newInstance();
                reader = factory.createXMLStreamReader(br);
                while (reader.hasNext()) {
                    switch (reader.next()) {
                    case XMLStreamReader.START_ELEMENT:
                        tagextractor.setTag(reader.getName().toString());
                        break;
                    case XMLStreamReader.CHARACTERS:
                        tagextractor.add(reader.getText());
                        break;
                    case XMLStreamReader.END_ELEMENT:
                        String text = tagextractor.closeTag();
                        if (text != null) profile.update(text);
                        break;
                    }
                }
            } catch (XMLStreamException e) {
                throw new LangDetectException(ErrorCode.TrainDataFormatError, "Training database file '" + file.getName() + "' is an invalid XML.");
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

     * @return Language profile instance
     * @throws LangDetectException
     */
    public static LangProfile loadFromText(String lang, File file) throws LangDetectException {

        LangProfile profile = new LangProfile(lang);

        BufferedReader is = null;
        try {
            is = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));

            int count = 0;
            while (is.ready()) {
                String line = is.readLine();
                profile.update(line);
                ++count;
            }

            System.out.println(lang + ":" + count);

View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

                continue;
            }

            FileOutputStream os = null;
            try {
                LangProfile profile = GenProfile.loadFromWikipediaAbstract(lang, file);
                profile.omitLessFreq();

                File profile_path = new File(get("directory") + "/profiles/" + lang);
                os = new FileOutputStream(profile_path);
                JSON.encode(profile, os);
            } catch (JSONException e) {
View Full Code Here

Examples of com.cybozu.labs.langdetect.util.LangProfile

            return;
        }

        FileOutputStream os = null;
        try {
            LangProfile profile = GenProfile.loadFromText(lang, file);
            profile.omitLessFreq();

            File profile_path = new File(lang);
            os = new FileOutputStream(profile_path);
            JSON.encode(profile, os);
        } catch (JSONException e) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.