Package org.htmlparser.util

Examples of org.htmlparser.util.NodeIterator


 
        NodeList categoryNodes = parser.extractAllNodesThatMatch(categoryFilter);

                List<Category> categories = new ArrayList<Category>();
        if (categoryNodes.size() > 0) {
          NodeIterator iter = categoryNodes.elements();
          while (iter.hasMoreNodes()) {
            Node node = iter.nextNode();
            parseCategory(node, categories);
          }
          this.categories = categories;
        }
       
View Full Code Here


      if (links.size() == 0) {
        return;
      }
     
        Category parent = null;
      NodeIterator iter = links.elements();
        while (iter.hasMoreNodes()) {
          LinkTag link = (LinkTag)iter.nextNode();
          Category category = new Category();
          category.setTitle(NodeUtils.getTextData(link));
          category.setId(link.getLink().replaceAll(".*/movieGenres/(.*?)", "$1"));
           
            if (category.getId().indexOf('/') == -1) {
View Full Code Here

            if (selectedPage != null && selectedPage.equals(Integer.toString(page))) {
           
                NodeList browseNodes = infoCollector.getNodeList(1);
               
                if (browseNodes.size() > 1) {
                    NodeIterator iter = browseNodes.elements();
                   
                    while (iter.hasMoreNodes()) {
                        MovieItem item = parseMovieSearch(iter.nextNode());
                        if (item != null) {
                            items.add(item);
                        }
                    }
                }
View Full Code Here

        URL url;
        HttpURLConnection connection;
        StringBuffer buffer;
        PrintWriter out;
        boolean pass;
        NodeIterator enumeration;
        Node node;
        Text string;

        try
        {
            url = new URL ("http://www.canadapost.ca/tools/pcl/bin/cp_search_response-e.asp");
             connection = (HttpURLConnection)url.openConnection ();
            connection.setRequestMethod ("POST");
            connection.setRequestProperty ("Referer", "http://www.canadapost.ca/tools/pcl/bin/default-e.asp");
            connection.setDoOutput (true);
            connection.setDoInput (true);
            connection.setUseCaches (false);
            buffer = new StringBuffer (1024);
            buffer.append ("app_language=");
            buffer.append ("english");
            buffer.append ("&");
            buffer.append ("app_response_start_row_number=");
            buffer.append ("1");
            buffer.append ("&");
            buffer.append ("app_response_rows_max=");
            buffer.append ("9");
            buffer.append ("&");
            buffer.append ("app_source=");
            buffer.append ("quick");
            buffer.append ("&");
            buffer.append ("query_source=");
            buffer.append ("q");
            buffer.append ("&");
            buffer.append ("name=");
            buffer.append ("&");
            buffer.append ("postal_code=");
            buffer.append ("&");
            buffer.append ("directory_area_name=");
            buffer.append ("&");
            buffer.append ("delivery_mode=");
            buffer.append ("&");
            buffer.append ("Suffix=");
            buffer.append ("&");
            buffer.append ("street_direction=");
            buffer.append ("&");
            buffer.append ("installation_type=");
            buffer.append ("&");
            buffer.append ("delivery_number=");
            buffer.append ("&");
            buffer.append ("installation_name=");
            buffer.append ("&");
            buffer.append ("unit_numbere=");
            buffer.append ("&");
            buffer.append ("app_state=");
            buffer.append ("production");
            buffer.append ("&");
            buffer.append ("street_number=");
            buffer.append (number);
            buffer.append ("&");
            buffer.append ("street_name=");
            buffer.append (street);
            buffer.append ("&");
            buffer.append ("street_type=");
            buffer.append (type);
            buffer.append ("&");
            buffer.append ("test=");
            buffer.append ("&");
            buffer.append ("city=");
            buffer.append (city);
            buffer.append ("&");
            buffer.append ("prov=");
            buffer.append (province);
            buffer.append ("&");
            buffer.append ("Search=");
            out = new PrintWriter (connection.getOutputStream ());
            out.print (buffer);
            out.close ();
            parser = new Parser (connection);
            parser.setNodeFactory (new PrototypicalNodeFactory (true));
        }
        catch (Exception e)
        {
            throw new ParserException ("You must be offline! This test needs you to be connected to the internet.", e);
        }

        pass = false;
        for (enumeration = parser.elements (); enumeration.hasMoreNodes ();)
        {
            node = enumeration.nextNode ();
            if (node instanceof Text)
            {
                string = (Text)node;
                if (-1 != string.getText ().indexOf (postal_code))
                    pass = true;
View Full Code Here

        File file;
        PrintWriter out;
        Parser parser;
        Node nodes[];
        int i;
        NodeIterator enumeration;

        path = System.getProperty ("user.dir");
        if (!path.endsWith (File.separator))
            path += File.separator;
        file = new File (path + "delete_me.html");
        try
        {
            out = new PrintWriter (new FileWriter (file));
            out.println ("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">");
            out.println ("<html>");
            out.println ("<head>");
            out.println ("<title>test</title>");
            out.println ("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">");
            out.println ("</head>");
            out.println ("<body>");
            out.println ("This is a test page ");
            out.println ("</body>");
            out.println ("</html>");
            out.close ();
            parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET));
            parser.setNodeFactory (new PrototypicalNodeFactory (true));
            nodes = new Node[30];
            i = 0;
            for (enumeration = parser.elements (); enumeration.hasMoreNodes ();)
            {
                nodes[i] = enumeration.nextNode ();
                i++;
            }
            assertEquals("Expected nodes",20,i);
        }
        catch (Exception e)
View Full Code Here

    {
        String path;
        File file;
        PrintWriter out;
        Parser parser;
        NodeIterator enumeration;

        path = System.getProperty ("user.dir");
        if (!path.endsWith (File.separator))
            path += File.separator;
        file = new File (path + "delete_me.html");
        try
        {
            out = new PrintWriter (new FileWriter (file));
            out.println ("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">");
            out.println ("<html>");
            out.println ("<head>");
            out.println ("<title>test</title>");
            out.println ("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">");
            out.println ("</head>");
            out.println ("<body>");
            out.println ("This is a test page ");
            out.println ("</body>");
            out.println ("</html>");
            // fill our 16K buffer on read
            for (int i = 0; i < InputStreamSource.BUFFER_SIZE; i++)
                out.println ();
            out.close ();
            parser = new Parser (file.getAbsolutePath (), new DefaultParserFeedback(DefaultParserFeedback.QUIET));
            parser.setNodeFactory (new PrototypicalNodeFactory (true));
            enumeration = parser.elements ();
            enumeration.nextNode ();
            if (-1 != System.getProperty ("os.name").indexOf("Windows"))
                // linux/unix lets you delete a file even when it's open
                assertTrue ("file deleted with more available", !file.delete ());
            // parser.getLexer ().getPage ().close ();
            parser = null;
View Full Code Here

     * the enumeration is created, that the charset has changed to the correct value.
     */
    public void testHTMLCharset ()
    {
        Parser parser;
        NodeIterator enumeration;

        try
        {
            parser = new Parser("http://www.sony.co.jp", Parser.DEVNULL);
            assertEquals("Character set by default is ISO-8859-1", "ISO-8859-1", parser.getEncoding ());
            enumeration = parser.elements();
            // search for the <BODY> tag
            while (enumeration.hasMoreNodes ())
                if (enumeration.nextNode () instanceof BodyTag)
                    break;
            assertTrue("Character set should be Shift_JIS", parser.getEncoding ().equalsIgnoreCase ("Shift_JIS"));
        }
        catch (ParserException e)
        {
View Full Code Here

        actual   = removeEscapeCharacters(actual);

        Parser expectedParser = Parser.createParser(expected, null);
        Parser resultParser   = Parser.createParser(actual, null);

        NodeIterator expectedIterator = expectedParser.elements();
        NodeIterator actualIterator =  resultParser.elements();
        displayMessage = createGenericFailureMessage(displayMessage, expected, actual);

        nextExpectedNode = null;
        nextActualNode = null;
        tag1 = null;
View Full Code Here

     * or <code>null</code> to retrieve all the top level nodes.
     * @throws ParserException If a parsing error occurs.
     */
    public NodeList parse (NodeFilter filter) throws ParserException
    {
        NodeIterator e;
        Node node;
        NodeList ret;

        ret = new NodeList ();
        for (e = elements (); e.hasMoreNodes (); )
        {
            node = e.nextNode ();
            if (null != filter)
                node.collectInto (ret, filter);
            else
                ret.add (node);
        }
View Full Code Here

     */
    public NodeList extractAllNodesThatMatch (NodeFilter filter)
        throws
            ParserException
    {
        NodeIterator e;
        NodeList ret;

        ret = new NodeList ();
        for (e = elements (); e.hasMoreNodes (); )
            e.nextNode ().collectInto (ret, filter);

        return (ret);
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.util.NodeIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.