Package edu.umd.hooka.corpora

Examples of edu.umd.hooka.corpora.Language


      BufferedReader r1_2 = null;
      if (readAlignments)
        r1_2=
          new BufferedReader(new InputStreamReader(new FileInputStream(afile1_2), "UTF8"));
      OutputStreamWriter w1 = new OutputStreamWriter(new FileOutputStream(ofile), oenc);
      Language de = Language.languageForISO639_1(lf);
      Language en = Language.languageForISO639_1(le);
      LanguagePair ende = null;
      if (readAlignments) ende = LanguagePair.languageForISO639_1Pair(le + "-" + lf);
      System.err.println("Reading " + en + " from: " + ifile1);
      System.err.println("Reading " + de + " from: " + ifile2);
      if (readAlignments)
View Full Code Here


      parseXMLDocument("/Users/redpony/bitexts/hansards.fr-en/hansards.fr-en.xml",
        new PChunkCallback() {
          Random r = new Random(1);
          BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/tmp/bar.xml"), "UTF8"));
          public void handlePChunk(ParallelChunk p) {
            Language fr = Language.languageForISO639_1("fr");
            Language en = Language.languageForISO639_1("en");
            Chunk f = p.getChunk(fr);
            if (f == null) return;
            Chunk e = p.getChunk(en);
            if (e == null) return;
            float elen = e.getLength();
View Full Code Here

      System.err.println("          (note: lang must be a two-letter ISO639 code)");
      System.exit(1);
    }
    try {
      BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "UTF8"));
      Language fl = Language.languageForISO639_1(args[0]);
      LanguagePair lp = LanguagePair.languageForISO639_1Pair(args[0]+"-en");
      AlignmentWordPreprocessor sawp = AlignmentWordPreprocessor.CreatePreprocessor(lp, fl, null);
      String l;
      BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[2]), "UTF8"));
      while ((l =in.readLine()) != null) {
View Full Code Here

TOP

Related Classes of edu.umd.hooka.corpora.Language

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.