Package org.htmlparser

Examples of org.htmlparser.PrototypicalNodeFactory


                + "<body>\n"
                + "<!-- foobar --!>\n"
                + "</body>\n"
                + "</html>\n"
                );
            parser.setNodeFactory (new PrototypicalNodeFactory (true));
            parseAndAssertNodeCount (18);
            assertTrue("Node should be a Remark but was " + node[12], node[12] instanceof Remark);
            assertStringEquals ("remark text", "<!-- foobar --!>", node[12].toHtml ());
        }
        finally
View Full Code Here


    {
        String preamble = "<div class=\"parser\">";
        String remark = "<!-- ->ERROR<!- -->";
        String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->";
        createParser (preamble + remark + rest);
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount (6);
        assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark);
        assertStringEquals ("remark text", remark, node[1].toHtml ());
    }
View Full Code Here

    {
        String preamble = "<div class=\"parser\">";
        String remark = "<!-- --->ERROR<!- -->";
        String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->";
        createParser (preamble + remark + rest);
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount (6);
        assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark);
        assertStringEquals ("remark text", remark, node[1].toHtml ());
    }
View Full Code Here

        return imgTagCount;
    }

    public int countImageTagsWithHTMLParser() throws ParserException {
        Parser parser = new Parser("http://education.yahoo.com/",new DefaultParserFeedback());
        parser.setNodeFactory (new PrototypicalNodeFactory (new ImageTag ()));
        setParser (parser);
        int parserImgTagCount = 0;
        Node node;
        for (NodeIterator e= parser.elements();e.hasMoreNodes();) {
            node = e.nextNode();
View Full Code Here

    /**
     * Create a web site capturer.
     */
    public SiteCapturer ()
    {
        PrototypicalNodeFactory factory;

        mSource = null;
        mTarget = null;
        mPages = new ArrayList ();
        mFinished = new HashSet ();
        mImages = new ArrayList ();
        mCopied = new HashSet ();
        mParser = new Parser ();
        factory = new PrototypicalNodeFactory ();
        factory.registerTag (new LocalLinkTag ());
        factory.registerTag (new LocalFrameTag ());
        factory.registerTag (new LocalBaseHrefTag ());
        factory.registerTag (new LocalImageTag ());
        mParser.setNodeFactory (factory);
        mCaptureResources = true;
        mFilter = null;
    }
View Full Code Here

     * with the end tag). The bug lies in NodeReader.readElement().
     * Creation date: (6/17/2001 4:01:06 PM)
     */
    public void testTextBug1() throws ParserException {
        createParser("<HTML><HEAD><TITLE>Google</TITLE>");
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount(5);
        // The fourth node should be a Text-  with the text - Google
        assertTrue("Fourth node should be a Text",node[3] instanceof Text);
        Text stringNode = (Text)node[3];
        assertEquals("Text of the Text","Google",stringNode.getText());
View Full Code Here

        assertEquals("http://asgard.ch",linkTag.getLink());
    }

    public void testToPlainTextString() throws ParserException {
        createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount(10);
        assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
        Text stringNode = (Text)node[3];
        assertEquals("First String Node","This is the Title",stringNode.toPlainTextString());
        assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
View Full Code Here

        assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString());
    }

    public void testToHTML() throws ParserException {
        createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>");
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount(10);
        assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text);
        Text stringNode = (Text)node[3];
        assertEquals("First String Node","This is the Title",stringNode.toHtml());
        assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text);
View Full Code Here

        createParser(
        "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+
        "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      \n"+
        "<br>"
        );
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount(4);
        assertTrue("Third Node identified must be a string node",node[2] instanceof Text);
    }
View Full Code Here

     */
    public void testStringBeingMissedBug() throws ParserException {
        createParser(
        "Before Comment <!-- Comment --> After Comment"
        );
        parser.setNodeFactory (new PrototypicalNodeFactory (true));
        parseAndAssertNodeCount(3);
        assertTrue("First node should be Text",node[0] instanceof Text);
        assertTrue("Second node should be Remark",node[1] instanceof Remark);
        assertTrue("Third node should be Text",node[2] instanceof Text);
        Text stringNode = (Text)node[0];
View Full Code Here

TOP

Related Classes of org.htmlparser.PrototypicalNodeFactory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.