Package appl.Portal.Utils.LinkSearch

Source Code of appl.Portal.Utils.LinkSearch.Item

/*
*  This software and supporting documentation were developed by
*
*    Siemens Corporate Technology
*    Competence Center Knowledge Management and Business Transformation
*    D-81730 Munich, Germany
*
*    Authors (representing a really great team ;-) )
*            Stefan B. Augustin, Thorbj�rn Hansen, Manfred Langen
*
*  This software is Open Source under GNU General Public License (GPL).
*  Read the text of this license in LICENSE.TXT
*  or look at www.opensource.org/licenses/
*
*  Once more we emphasize, that:
*  THIS SOFTWARE IS MADE AVAILABLE,  AS IS,  WITHOUT ANY WARRANTY
*  REGARDING  THE  SOFTWARE,  ITS  PERFORMANCE OR
*  FITNESS FOR ANY PARTICULAR USE, FREEDOM FROM ANY COMPUTER DISEASES OR
*  ITS CONFORMITY TO ANY SPECIFICATION. THE ENTIRE RISK AS TO QUALITY AND
*  PERFORMANCE OF THE SOFTWARE IS WITH THE USER.
*
*/


// ************ package ****************************************************
package appl.Portal.Utils.LinkSearch;

// ************ imports ******************************************************

// KFM
import KFM.Exceptions.ProgrammerException;
import KFM.File.FileUtils;
import KFM.Converter;
import KFM.log.*;

// OROMatcher package
import com.oroinc.text.regex.*;

// java package
import java.util.Hashtable;

public class Item
{

    /** Stringarray that gives the references for the mItem Hashtable. */
    private String[] mNames;

    // string with the item as content from where we will get the attributes via matcher()
    private String mContent;

    /* This class helps to get rid of html tags that came from the searchengine. */
    private Converter mConverter;

    /** A regular expression wich has attributes of an item as a subgroup.
    *
    * For more information about regular expression  and about how they work see:
    * $/KFM/www-docs/protected/developer/appl/Portal/MetaSearch/SearchEngineWrapper.html
    *
    * For more information about the implememtation of regular expression ba OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    *
    * MatchResult passed on to: group( < 0 )are passed on to a Hashtable wich is referenced
    * by the strings given through mNames[].
    */
    protected String mRegExpItem;

    /** Hashtable that holds the attributes of a ResultItem.
    *
    *  The references for the Hashtable are given by mNames[].
    */
    private Hashtable mItems = new Hashtable();

    /** A MatchResult, see package com.oroinc.text.regex.*;
    *
    * For more information about the implememtation of regular expression ba OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    */
    private MatchResult mMatcher;

    /** Constructor that gets all the information
    *   he needs to look for ResultItems.
    */
    public Item( String aRegExpItem, String aContent, String[] aNames )
    {
        mNames = aNames;
        mContent = aContent;
        mRegExpItem = aRegExpItem;
         KFMSystem.log.debug(aRegExpItem);
         KFMSystem.log.debug(mContent);
         KFMSystem.log.debug( "Matcher of Item was reached!");

        mMatcher = this.match( mRegExpItem, mContent );
        if (mMatcher != null){
        } else {
            KFMSystem.log.debug("NO ITEM MATCH");
            return;
        }
        // with ( (j -1) < mNames.length )) i just wanted to prevent some null
        // pointer exceptions
        // KFMSystem.log.debug( mMatcher.groups() );

        for( int j = 1; (( j < mMatcher.groups()) && ( (j -1) < mNames.length )); ++j )
        {
            String s = mConverter.removeHtmlTags( mMatcher.group(j));
                   s = mConverter.replaceString("&nbsp;", "", s );
            mItems.put( aNames[j - 1], s );
        }
    }

    /**
    * Method wich matches a patternstring with a contentstring.
    * Here the regular expression metacharacter '.' matches
    * everythin, even new lines ('\n'). See listing below:
    *
    *
    * For more information about the implememtation of regular expression ba OROMatcher(TM) see:
    * $/KFM/www-docs/protected/developer/external-docu/OROMatcher-1.0.7/doc/index.html
    *
    * @return MatchResult
    */
    public MatchResult match( String patternString, String text )
    {
        int groups;
        // PatternMatcher Instance
        PatternMatcher matcher = new Perl5Matcher();
        // a compiler
        PatternCompiler compiler = new Perl5Compiler();
        Pattern pattern = null;
        // the patternmatcher Input
        PatternMatcherInput input;
        MatchResult result;

        // Wenn you set the Perl5Compiler.SINGLELINE_MASK option
        // the contentstring is treated singleline, even if there
        // where some '\n' in it.
        try  {
            pattern = compiler.compile(patternString, Perl5Compiler.SINGLELINE_MASK );
        } catch(MalformedPatternException e) {
            System.err.println("LinkSearch.Item.match: Bad pattern: `" + e.getMessage() + "�.");
            //@@@ System.exit(1);
            //@@@ Make this cleaner some day.
            throw new ProgrammerException("LinkSearch.match: Bad pattern: `" + e.getMessage() + "�.");
        }

        input = new PatternMatcherInput(text);

        // When true, a match has been found.
        if(matcher.contains(input, pattern))  {
            result = matcher.getMatch();
        } else {
            result =  null;
        }
        return result;
    }

    public Hashtable getItem()
    {
        return mItems;
    }

    /**
    * Method to test if a Pattern compiles
    *
    * @return boolean
    */

    public static boolean compile( String aPatternString )
    {
        try
        {
            // wenn you set the Perl5Compiler.SINGLELINE_MASK option
            // the contentstring is treated singleline, even if there
            // where some '\n' in it.
            PatternCompiler compiler = new Perl5Compiler();
            Pattern pattern = compiler.compile( aPatternString, Perl5Compiler.SINGLELINE_MASK |
                                                                    Perl5Compiler.CASE_INSENSITIVE_MASK);
            return true;
        }
        catch( MalformedPatternException e ){ System.err.println (e); return false;}

    }
    /**
    * the ideal main routine for debugging perposes
    * just put same text to be matched by a pattern into
    * a file, type a pattern string and debug
    */
    public static void main( String args[] )
    {
        try {
            String tFileContent = FileUtils.readWholeTextFile("C:\\users\\reindl\\Files\\yahooquery.txt");
            String tPatternString =
                "<li>(.*?)</i><p>";

                Item tItem = new Item(tPatternString, tFileContent, new String[2]);
            if( tFileContent != null )
            {
                if (tItem.compile( tPatternString )) {
                    MatchResult tMR= tItem.match( tPatternString, tFileContent);
                    for (int i = 0; i < tMR.groups(); ++i )
                        System.out.println("group(" + i + ") :" + tMR.group(i));
                }
                else {System.out.println("Did not compile.");}
            }
        }
        catch( java.io.IOException e){System.err.println( e );}
    }
}
TOP

Related Classes of appl.Portal.Utils.LinkSearch.Item

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.