Package appl.Portal.Utils.Translator

Source Code of appl.Portal.Utils.Translator.Translator

/*
*  This software and supporting documentation were developed by
*
*    Siemens Corporate Technology
*    Competence Center Knowledge Management and Business Transformation
*    D-81730 Munich, Germany
*
*    Authors (representing a really great team ;-) )
*            Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
*
*  This software is Open Source under GNU General Public License (GPL).
*  Read the text of this license in LICENSE.TXT
*  or look at www.opensource.org/licenses/
*
*  Once more we emphasize, that:
*  THIS SOFTWARE IS MADE AVAILABLE,  AS IS,  WITHOUT ANY WARRANTY
*  REGARDING  THE  SOFTWARE,  ITS  PERFORMANCE OR
*  FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
*  ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
*  PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
*
*/


// ************ package ******************************************************
package appl.Portal.Utils.Translator;

// ************ import ******************************************************

// Linksearch packages
import appl.Portal.Utils.LinkSearch.*;

// OROMatcher packages
import com.oroinc.text.regex.*;

// java packages
import java.io.Writer;
import java.io.StringWriter;
import java.io.IOException;
import java.util.Properties;
import java.util.Vector;
import java.util.Date;
import java.util.Hashtable;
import java.net.URL;
import java.net.MalformedURLException;

// KFM classes
import KFM.Converter;
import KFM.log.*;

// uses HTMLLoader
import KFM.HTML.HtmlLoader2;


public class Translator implements GenericResultSet
{

    private HtmlLoader2 mHtmlLoader;

    /** String that contains a regular expression.
    *
    * This regular expression parses the static wraparound of the result html page
    * of Leo ( i. e. banners, images etc. ).
    *
    * For more information about regular expression  and about how they work see:
    * $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
    *
    * For more information about the implememtation of regular expression by OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    *
    * To be matched: String that contains a result html page
    * MatchResult passed on to: mRegExpWordpair.
    *
    */
    private static String mRegExpFrame = "<!-- # Spalten 4-6, Ergebnisse # -->(.*)<!-- # Spalte 7, Abstandshalter # -->";

    /** String that contains a regular expression.
    *
    * This regular expression parses one wordpair ( e. g. an english word and the german translation
    * for it ) of the result html page.
    *
    * For more information about regular expression  and about how they work see:
    * $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
    *
    * For more information about the implememtation of regular expression by OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    *
    * To be matched: group(0) of mRegExpFrame
    * MatchResult passed on to: mRegExpWord
    */
    private static String mRegExpWordpair = "<TR\\s*VALIGN=\"top\"(?:.*?)43%\">(.*?)</TD>(?:.*?)43%\">(.*?)</TD>";

    /** String that contains a regular expression.
    *
    * This regular expression parses two words ( e. g. an english word and the german translation
    * for it ) of the result html page.
    *
    * For more information about regular expression and about how they work see:
    * $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
    *
    * For more information about the implememtation of regular expression by OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    *
    * To be matched: group(0) of mRegExpWordpair
    * MatchResult passed on to: groups( > 0 ) are passed on to Item.java, there stored in a
    * Hashtable that is referenced by strings given through mNames[].
    *
    */
    private static String mRegExpWord = "<TR\\s*VALIGN=\"top\"(?:.*?)43%\">(.*?)</TD>(?:.*?)43%\">(.*?)</TD>";

    /** This stringarray contains the information how the wordpair hashtable shall be referenced. */
    private static String mNames[] = {"en", "de" };

    /** Empty String because the value of display language is optional. */
    private String mDisplayLanguage = "";

    /** The webaddress of leo.*/
    private static final String theUrltemplate = "http://dict.leo.org/";

    /** Http parameter, wich tells leos webapplication to start the translation. */
    private static final String theEventString = "?relink=off&search=";

    /** Http parameter, wich tells leos webapplication in wich language the result should be displayed.
    *
    * This parameter is optional.
    */
    private static final String theDisplayLanguageString = "&lang=";

    /** This class parses HTML Files.
    *
    * This class parses HTML Files with the information given by the class members
    * mRegExpFrame, mRegExpWordpair, mRegExpWord, mNames;
    */
    private HtmlParser mHtmlParser;

    /** Keeps track of the current index when iterating through the resultset. */
    int mCounter = 0;

    /** Log File */
    private KFMLog mLog;

    /** Constructor */
    public Translator ()
    {
        mLog=KFMSystem.log;
    }

    public Translator (
        KFMLog aLog)
    {
        mLog=aLog;
    }

    /** Translates a word into the specified language by sending it to leo, retrieving and parsing the result.
    * The class HtmlParser.java is used, where the results are stored.
    * The results can be retrieved with getWordpair();
    *
    * @param aWord the word to be translated
    *
    */
    public boolean translate( String aWord )
    {
            String tUrlString = this.buildUrl(aWord, this.getDisplayLanguage());


            // load URL
            mHtmlLoader = new HtmlLoader2();
            boolean tResult=false;

            try {
                tResult = mHtmlLoader.load(tUrlString, 0);
            } catch (MalformedURLException me){
            }
            catch (java.io.InterruptedIOException me){
            }

            if (!tResult){
                mLog.info("Cannot load Translator URL");
                return false;
            } else {
                String tContent = mHtmlLoader.getContent();

                mHtmlParser = new HtmlParser();
                // give mHtmlParser the information he needs
                mHtmlParser.setRegExpFrame(this.mRegExpFrame);
                mHtmlParser.setRegExpItemSet(this.mRegExpWordpair);
                mHtmlParser.setRegExpItem(this.mRegExpWord);
                mHtmlParser.setNames(mNames);
                // parse doucument and store wordpairs in a Hashtable
                // they can be retrieved with getWordpairs()
                mHtmlParser.parse(tContent);
            }
            return true;
    }

    /** Returns the wordpairs retrieved from leo as a Hashtable.
    *
    * See docu of 'mWordpairs' for an explanation of its function.
    *
    * @return Hashtable with wordpairs.
    */
    public Hashtable nextItem()
    {
        Hashtable tHash = mHtmlParser.getItem(mCounter);
        mCounter++;
        return tHash;
    }

    public void setDisplayLanguage( String aDisplayLanguage)
    {
        mDisplayLanguage = aDisplayLanguage;
    }

    public String getDisplayLanguage()
    {
        return mDisplayLanguage;
    }

    /** Tests if more wordpairs are available. */
    public boolean hasMoreItems()
    {
        if (mHtmlParser == null){
            return false;
        }
        return mCounter < mHtmlParser.getNumberOfItems();
    }

    /** Builds the url with the specific http parameters.
    *
    * See docu of 'theLanguageString'.
    */
    public String buildUrl( String aWord, String aLanguage )
    {
        // the word to be translated may contain spaces. therefor it has to be encoded

        return (theUrltemplate + theEventString + java.net.URLEncoder.encode( aWord ) +
                        theDisplayLanguageString + this.getDisplayLanguage());
    }

    /** Gets the leo web url
    *
    */
    public String getUrl()
    {
        return theUrltemplate;
    }



    public static void main( String[] args )
    {
        Translator tTranslator = new Translator();
        // set word to be tranlated and displaylanguage
        tTranslator.translate( "Framework" );
        tTranslator.setDisplayLanguage("de");
        while( tTranslator.hasMoreItems() )
        {
            Hashtable tIdiom = tTranslator.nextItem();
            if( tTranslator.getDisplayLanguage().equals("de") )
            {
                System.out.println( "Englisch: " + (String)tIdiom.get("en") );
                System.out.println( "Deutsch: " +( String ) tIdiom.get("de"));
            }
            else
            {
                System.out.println( "English: " + (String)tIdiom.get("en") );
                System.out.println( "German: " +( String ) tIdiom.get("de"));
            }
        }
        System.out.println( "Finished!" );
    }
}
TOP

Related Classes of appl.Portal.Utils.Translator.Translator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.