Package joshua.util

Examples of joshua.util.CommandLineParser$UniversalSet


   *
   * @param args Command line arguments
   */
  public static void main(String[] args) {

    CommandLineParser commandLine = new CommandLineParser();
   
    Option<String> source_file = commandLine.addStringOption('s',"source-text","SOURCE_FILENAME","name of file containing source language corpus");
    //Option<String> source_file_encoding = commandLine.addStringOption("source-encoding","SOURCE_ENCODING","ISO-8859-1","source language file encoding");
    Option<String> source_file_encoding = commandLine.addStringOption("source-encoding","SOURCE_ENCODING","UTF-8","source language file encoding");
    Option<Boolean> source_file_gz = commandLine.addBooleanOption("source-text-gzipped",false,"is the source text gzipped");
   
    Option<String> target_file = commandLine.addStringOption('t',"target-text","TARGET_FILENAME","name of file containing target language corpus");
    //Option<String> target_file_encoding = commandLine.addStringOption("target-encoding","TARGET_ENCODING","ISO-8859-1","target language file encoding");
    Option<String> target_file_encoding = commandLine.addStringOption("target-encoding","TARGET_ENCODING","UTF-8","target language file encoding");
    Option<Boolean> target_file_gz = commandLine.addBooleanOption("target-text-gzipped",false,"is the target text gzipped");
   
    Option<String> alignment_file = commandLine.addStringOption('a',"alignment","ALIGNMENT_FILENAME","name of file containing word alignments for the sentences in the corpus");
    Option<Boolean> alignment_file_gz = commandLine.addBooleanOption("alignment-file-gzipped",false,"is the alignment file gzipped");

    Option<Integer> num_lines = commandLine.addIntegerOption('l',"lines","LINE_COUNT","number of aligned sentences in the corpus");
   
    Option<String> output_file = commandLine.addStringOption('o',"output","OUTPUT_FILENAME","file where aligned word pairs will be written");
    Option<String> output_file_encoding = commandLine.addStringOption("output-encoding","OUTPUT_ENCODING","UTF-8","output file encoding");
    Option<Boolean> output_file_gz = commandLine.addBooleanOption("output-text-gzipped",false,"should the output file be gzipped");
   
    commandLine.parse(args);
   
   
    try {
     
      // Set System.out and System.err to use the provided character encoding
      try {
        System.setOut(new PrintStream(System.out, true, commandLine.getValue(source_file_encoding)));
        System.setErr(new PrintStream(System.err, true, commandLine.getValue(source_file_encoding)));
      } catch (UnsupportedEncodingException e1) {
        System.err.println(commandLine.getValue(source_file_encoding) + " is not a valid encoding; using system default encoding for System.out and System.err.");
      } catch (SecurityException e2) {
        System.err.println("Security manager is configured to disallow changes to System.out or System.err; using system default encoding.");
      }
     
      // The number of lines to read
      int number_of_lines = commandLine.getValue(num_lines);

      // Set up the source text for reading
      Scanner source_text;
      if (commandLine.getValue(source_file).endsWith(".gz") || commandLine.getValue(source_file_gz)) {
        source_text = new Scanner(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(commandLine.getValue(source_file))),commandLine.getValue(source_file_encoding))));
      } else {
        source_text = new Scanner( new File(commandLine.getValue(source_file)), commandLine.getValue(source_file_encoding));
      }
     
      // Set up the target text for reading
      Scanner target_text;
      if (commandLine.getValue(target_file).endsWith(".gz") || commandLine.getValue(target_file_gz)) {
        target_text = new Scanner(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(commandLine.getValue(target_file))),commandLine.getValue(target_file_encoding))));
      } else {
        target_text = new Scanner( new File(commandLine.getValue(target_file)), commandLine.getValue(target_file_encoding));
      }
     
      // Set up the alignment file for reading
      Scanner alignments;
      if (commandLine.getValue(alignment_file).endsWith(".gz") || commandLine.getValue(alignment_file_gz)) {
        alignments = new Scanner(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(commandLine.getValue(alignment_file))))));
      } else {
        alignments = new Scanner( new File(commandLine.getValue(alignment_file)));
      }
     
     
      // Set up the output file for writing
      Writer outputFile;
      if (commandLine.getValue(output_file).endsWith(".gz") || commandLine.getValue(output_file_gz)) {
        outputFile = new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(commandLine.getValue(output_file))),commandLine.getValue(output_file_encoding));
      } else {
        outputFile = new OutputStreamWriter(new FileOutputStream(commandLine.getValue(output_file)),commandLine.getValue(output_file_encoding));
      }
     
      try {
        extract(number_of_lines, source_text, target_text, alignments, outputFile);
      } catch (NoSuchElementException e) {
        System.err.println("There are more than " + number_of_lines + " lines of input. Please determine the actual number of lines of input, and re-run with the appropriate command line flag.");
        commandLine.printUsage();
        System.exit(-1);
      }

    } catch (FileNotFoundException e) {
      e.printStackTrace();
View Full Code Here

TOP

Related Classes of joshua.util.CommandLineParser$UniversalSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.