Package org.annolab.tt4j

Source Code of org.annolab.tt4j.Util

/*******************************************************************************
* Copyright (c) 2009-2014 Richard Eckart de Castilho.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
*     Richard Eckart de Castilho - initial API and implementation
******************************************************************************/
package org.annolab.tt4j;

import static java.io.File.separator;

import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.CharBuffer;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;

/**
* Utility functions.
*
* @author Richard Eckart de Castilho
*/
public final
class Util
{
  private
  Util()
  {
    // No instances
  }

  /**
   * Get the search paths for a model or executable. Using the {@code aSubPath}
   * argument, executables and models can be searched for in different
   * locations, e.g. executables in {@literal executable} and models in
   * {@literal models}.
   * <br/>
   * The returned list contains the additional search paths, the value of the
   * system property {@literal treetagger.home} and the
   * environment variables {@literal TREETAGGER_HOME} and {@literal TAGDIR}
   * in this order.
   *
   * @param aAdditionalPaths additional paths to search in.
   * @param aSubPath search in the given sub-directory of the search paths.
   * @return a list of search paths.
   */
  public static
  List<String> getSearchPaths(
      final List<String> aAdditionalPaths,
      final String aSubPath)
  {
    final List<String> paths = new ArrayList<String>();
    paths.addAll(aAdditionalPaths);
    if (System.getProperty("treetagger.home") != null) {
      paths.add(System.getProperty("treetagger.home")+separator+aSubPath);
    }
    if (System.getenv("TREETAGGER_HOME") != null) {
      paths.add(System.getenv("TREETAGGER_HOME")+separator+aSubPath);
    }
    if (System.getenv("TAGDIR") != null) {
      paths.add(System.getenv("TAGDIR")+separator+aSubPath);
    }
//    String path = System.getenv("PATH");
//    if (path != null) {
//      paths.addAll(asList(path.split(File.pathSeparator)));
//    }
    return paths;
  }

    /**
     * Join the given strings into a single string separated by the given
     * separator.
     *
     * @param aStrings strings to join.
     * @param aSeparator a separator.
     * @return the joined string.
     */
    public static
  String join(
      final String[] aStrings,
      final String aSeparator)
  {
    final StringBuilder sb = new StringBuilder();

    for (int i = 0; i < aStrings.length; i++) {
      sb.append(aStrings[i]);
      if (i < aStrings.length - 1) {
        sb.append(aSeparator);
      }
    }

    return sb.toString();
  }

    /**
     * Join the given strings into a single string separated by the given
     * separator.
     *
     * @param aStrings strings to join.
     * @param aSeparator a separator.
     * @return the joined string.
     */
    public static
  String join(
      final Collection<String> aStrings,
      final String aSeparator)
  {
    final StringBuilder sb = new StringBuilder();

    Iterator<String> i = aStrings.iterator();
    while (i.hasNext()) {
      sb.append(i.next());
      if (i.hasNext()) {
        sb.append(aSeparator);
      }
    }

    return sb.toString();
  }

    /**
     * Checks if a token returned by TreeTagger corresponds to the token sent to TreeTagger. This
     * method does not check for strict equality, because if TreeTagger does not know a character,
     * it will return "?" instead. So this method interprets an "?" returned by TreeTagger as a
     * single-character wildcard.
     *
     * @param tokenSent token sent to TreeTagger
     * @param tokenReturned token returned from TreeTagger
     * @return if the token returned matches the token sent.
     */
    public static
    boolean matches(
        final String tokenSent,
        final String tokenReturned)
    {
      if (tokenSent == null && tokenReturned == null) {
        return true;
      }
      if (tokenSent == null || tokenReturned == null) {
        return false;
      }
      if (tokenSent.length() != tokenReturned.length()) {
        return false;
      }
      for (int i = 0; i < tokenSent.length(); i ++) {
        if (tokenReturned.charAt(i) == '?') {
          continue;
        }
        if (tokenSent.charAt(i) == tokenReturned.charAt(i)) {
          continue;
        }
        return false;
      }
      return true;
    }

    /**
     * Close the given {@link Closeable}.
     *
     * @param aClosable a closable object.
     */
    public static
    void close(
        final Closeable aClosable)
    {
      if (aClosable != null) {
        try {
          aClosable.close();
        }
        catch (final IOException e) {
          // Ignore
        }
      }
    }
   
    /**
     * For tests only.
     */
    protected static
    String readFile(
        final File aFile,
        final String aEncoding)
    throws IOException
    {
      Reader reader = null;
      try {
          StringBuilder sb = new StringBuilder();
          CharBuffer buffer = CharBuffer.allocate(65535);
        reader = new InputStreamReader(new FileInputStream(aFile), aEncoding);
        while (reader.ready()) {
          reader.read(buffer);
          buffer.flip();
          sb.append(buffer.toString());
        }
        return sb.toString();
      }
      finally {
        close(reader);
      }
    }

    /**
     * For tests only.
     */
    protected static
    void writeFile(
        final String aText,
        final File aFile,
        final String aEncoding)
    throws IOException
    {
      Writer writer = null;
      try {
        aFile.getParentFile().mkdirs();
          writer = new OutputStreamWriter(new FileOutputStream(aFile), aEncoding);
          writer.write(aText);
      }
      finally {
        close(writer);
      }
    }

    /**
     * For tests only.
     */
    protected static
    String[] tokenize(
        final String aText,
        final Locale aLocale)
    {
      List<String> tokens = new ArrayList<String>();
      BreakIterator bi = BreakIterator.getWordInstance(aLocale);
      bi.setText(aText);
      int begin = 0;
      while (bi.next() != BreakIterator.DONE) {
        tokens.add(aText.substring(begin, bi.current()));
        begin = bi.current();
      }
      return tokens.toArray(new String[tokens.size()]);
    }
}
TOP

Related Classes of org.annolab.tt4j.Util

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.