Package ivory.core.util

Source Code of ivory.core.util.CLIRUtilsTest

package ivory.core.util;

import java.io.File;
import java.io.IOException;

import junit.framework.Assert;
import junit.framework.TestCase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

import edu.umd.hooka.Vocab;
import edu.umd.hooka.alignment.HadoopAlign;
import edu.umd.hooka.ttables.TTable_monolithic_IFAs;

public class CLIRUtilsTest extends TestCase {
  @Test
  public void testGIZA(){
    String srcVocabFile = "etc/toy_vocab.de-en.de";
    String trgVocabFile = "etc/toy_vocab.de-en.en";
    String ttableFile = "etc/toy_ttable.de-en";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromGIZA(
          "etc/toy_lex.0-0.f2n",
          srcVocabFile,
          trgVocabFile,
          ttableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("buch");
      int trg = trgVocab.get("book");
      assertTrue("Giza_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("krankenhaus");
      trg = trgVocab.get("hospit");
      assertTrue("Giza_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }
  }

  @Test
  public void testGIZA2(){
    String srcVocabFile = "etc/vocab.en-de.en";
    String trgVocabFile = "etc/vocab.en-de.de";
    String ttableFile = "etc/ttable.en-de";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromGIZA(
          "etc/toy_lex.0-0.n2f",
          srcVocabFile,
          trgVocabFile,
          ttableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("book");
      int trg = trgVocab.get("buch");
      assertTrue("Giza_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("hospit");
      trg = trgVocab.get("krankenhaus");
      assertTrue("Giza_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    new File(srcVocabFile).delete();
    new File(trgVocabFile).delete();
    new File(ttableFile).delete();
  }

  @Test
  public void testBerkeleyAligner(){
    String srcVocabFile = "etc/vocab.de-en.de";
    String trgVocabFile = "etc/vocab.de-en.en";
    String ttableFile = "etc/ttable.de-en";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromBerkeleyAligner(
          "etc/toy_stage2.2.params.txt",
          srcVocabFile,
          trgVocabFile,
          ttableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("buch");
      int trg = trgVocab.get("book");
      assertTrue("Berk_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("krankenhaus");
      trg = trgVocab.get("hospit");
      assertTrue("Berk_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    new File(srcVocabFile).delete();
    new File(trgVocabFile).delete();
    new File(ttableFile).delete();
  }

  @Test
  public void testBerkeleyAligner2(){
    String srcVocabFile = "etc/vocab.en-de.en";
    String trgVocabFile = "etc/vocab.en-de.de";
    String ttableFile = "etc/ttable.en-de";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromBerkeleyAligner(
          "etc/toy_stage2.1.params.txt",
          srcVocabFile,
          trgVocabFile,
          ttableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("book");
      int trg = trgVocab.get("buch");
      assertTrue("Berk_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("hospit");
      trg = trgVocab.get("krankenhaus");
      assertTrue("Berk_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    new File(srcVocabFile).delete();
    new File(trgVocabFile).delete();
    new File(ttableFile).delete();
  }

  @Test
  public void testHooka(){
    String finalSrcVocabFile = "etc/toy_vocab.de-en.de";
    String finalTrgVocabFile = "etc/toy_vocab.de-en.en";
    String finalTTableFile = "etc/toy_ttable.de-en";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromHooka(
          "etc/toy_vocab.de-en.de.raw",
          "etc/toy_vocab.de-en.en.raw",
          "etc/toy_ttable.de-en.raw",
          finalSrcVocabFile,
          finalTrgVocabFile,
          finalTTableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(finalSrcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(finalTrgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(finalTTableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("buch");
      int trg = trgVocab.get("book");
      assertTrue("Hooka_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("krankenhaus");
      trg = trgVocab.get("hospit");
      assertTrue("Hooka_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }
  }


  @Test
  public void testHooka2(){
    String finalSrcVocabFile = "etc/toy_vocab.en-de.en";
    String finalTrgVocabFile = "etc/toy_vocab.en-de.de";
    String finalTTableFile = "etc/toy_ttable.en-de";

    Configuration conf =  new Configuration();
    try {
      CLIRUtils.createTTableFromHooka(
          "etc/toy_vocab.en-de.en.raw",
          "etc/toy_vocab.en-de.de.raw",
          "etc/toy_ttable.en-de.raw",
          finalSrcVocabFile,
          finalTrgVocabFile,
          finalTTableFile,
          0.9f, 15, FileSystem.getLocal(conf));
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }

    try {
      Vocab srcVocab = HadoopAlign.loadVocab(new Path(finalSrcVocabFile), FileSystem.getLocal(conf));
      Vocab trgVocab = HadoopAlign.loadVocab(new Path(finalTrgVocabFile), FileSystem.getLocal(conf));
      TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(finalTTableFile), true);

      assertTrue(ttable.getMaxE()==srcVocab.size()-1);

      int src = srcVocab.get("book");
      int trg = trgVocab.get("buch");
      assertTrue("Hooka_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);

      src = srcVocab.get("hospit");
      trg = trgVocab.get("krankenhaus");
      assertTrue("Hooka_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3);
    } catch (IOException e) {
      e.printStackTrace();
      Assert.fail();
    }
  }

}
TOP

Related Classes of ivory.core.util.CLIRUtilsTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.