Examples of MetaTag


Examples of org.htmlparser.tags.MetaTag

                + "</body>"
                + "</html>");
        parser.registerScanners();
        parseAndAssertNodeCount(10);
        assertType("fourth node", MetaTag.class, node[4]);
        MetaTag metaTag = (MetaTag) node[4];

        assertStringEquals(
            "content",
            "text/html; charset=iso-8859-1",
            metaTag.getAttribute("CONTENT"));
    }
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

     */
    public NodeIterator elements() throws ParserException
    {
        boolean remove_scanner;
        Node node;
        MetaTag meta;
        String httpEquiv;
        String charset;
        boolean restart;
        EndTag end;
        IteratorImpl ret;
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

        boolean remove_scanner,
        IteratorImpl ret)
        throws ParserException
    {
        Node node;
        MetaTag meta;
        String httpEquiv;
        String charset;
        EndTag end;
        if (null != url_conn)
            try
            {
                if (null == scanners.get("-m"))
                {
                    addScanner(new MetaTagScanner("-m"));
                    remove_scanner = true;
                }

                /* pre-read up to </HEAD> looking for charset directive */
                while (null != (node = ret.peek()))
                {
                    if (node instanceof MetaTag)
                    { // check for charset on Content-Type
                        meta = (MetaTag) node;
                        httpEquiv = meta.getAttribute("HTTP-EQUIV");
                        if ("Content-Type".equalsIgnoreCase(httpEquiv))
                        {
                            charset = getCharset(meta.getAttribute("CONTENT"));
                            if (!charset.equalsIgnoreCase(character_set))
                            { // oops, different character set, restart
                                character_set = charset;
                                recreateReader();
                                ret =
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

                 i.hasMoreNodes();)
            {
                Node node = i.nextNode();
                if (node instanceof MetaTag)
                {
                    MetaTag meta = (MetaTag) node;
                    if ( meta.getHttpEquiv() != null &&
                            meta.getHttpEquiv().equalsIgnoreCase(YADIS_XRDS_LOCATION) )
                    {
                        if ( xrdsLocation != null )
                            throw new YadisException(
                                "More than one " + YADIS_XRDS_LOCATION +
                                "META tags found in HEAD: " + head.toHtml(),
                                YadisResult.HTMLMETA_INVALID_RESPONSE);

                        xrdsLocation = meta.getMetaContent();
                        if (DEBUG)
                            _log.debug("Found " + YADIS_XRDS_LOCATION + "META tags.");
                    }
                }
            }
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

              _keywords.add(kw);
            }
          }
          */
        } else if ( n.getClass() == MetaTag.class ) {
          MetaTag t = (MetaTag)n;
          if (t.getAttribute("name") != null && t.getAttribute("name").equals("keywords") ) {
            String[] kws = t.getAttribute("content").split(",");
            for( String kw : kws ) {
              _keywords.add(kw);
            }
          }
        }
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

        String url = "http://htmlparser.sourceforge.net/test/gb2312Charset.html";
        int i;
        Node[] nodes;

        parser = new Parser(url);
        parser.setNodeFactory (new PrototypicalNodeFactory (new MetaTag ()));
        i = 0;
        nodes = new Node[30];
        for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
            nodes[i++] = e.nextNode();
        assertEquals ("Expected nodes", 23, i);
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

    {
        String url;
        int bookmark;
        NodeList list;
        NodeList robots;
        MetaTag robot;
        String content;
        File file;
        File dir;
        PrintWriter out;

        // get the next URL and add it to the done pile
        url = (String)mPages.remove (0);
        System.out.println ("processing " + url);
        mFinished.add (url);

        try
        {
            bookmark = mPages.size ();
            // fetch the page and gather the list of nodes
            mParser.setURL (url);
            try
            {
                list = new NodeList ();
                for (NodeIterator e = mParser.elements (); e.hasMoreNodes (); )
                    list.add (e.nextNode ()); // URL conversion occurs in the tags
            }
            catch (EncodingChangeException ece)
            {
                // fix bug #998195 SiteCatpurer just crashed
                // try again with the encoding now set correctly
                // hopefully mPages, mImages, mCopied and mFinished won't be corrupted
                mParser.reset ();
                list = new NodeList ();
                for (NodeIterator e = mParser.elements (); e.hasMoreNodes (); )
                    list.add (e.nextNode ());
            }

            // handle robots meta tag according to http://www.robotstxt.org/wc/meta-user.html
            // <meta name="robots" content="index,follow" />
            // <meta name="robots" content="noindex,nofollow" />
            robots = list.extractAllNodesThatMatch (
                new AndFilter (
                    new NodeClassFilter (MetaTag.class),
                    new HasAttributeFilter ("name", "robots")), true);
            if (0 != robots.size ())
            {
                robot = (MetaTag)robots.elementAt (0);
                content = robot.getAttribute ("content").toLowerCase ();
                if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("nofollow")))
                    // reset mPages
                    for (int i = bookmark; i < mPages.size (); i++)
                        mPages.remove (i);
                if ((-1 != content.indexOf ("none")) || (-1 != content.indexOf ("noindex")))
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

        registerTag (new ImageTag ());
        registerTag (new InputTag ());
        registerTag (new JspTag ());
        registerTag (new LabelTag ());
        registerTag (new LinkTag ());
        registerTag (new MetaTag ());
        registerTag (new ObjectTag ());
        registerTag (new OptionTag ());
        registerTag (new ParagraphTag ());
        registerTag (new ProcessingInstructionTag ());
        registerTag (new ScriptTag ());
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

        assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ());
        assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag);
        HeadTag headTag = (HeadTag)htmlTag.getChild(0);
        assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ());
        assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag);
        MetaTag metaTag = (MetaTag)headTag.getChild(1);

        assertStringEquals(
            "content",
            "text/html; charset=iso-8859-1",
            metaTag.getAttribute("CONTENT")
        );
    }
View Full Code Here

Examples of org.htmlparser.tags.MetaTag

            new PrototypicalNodeFactory (
                new Tag[]
                {
                    new TitleTag (),
                    new StyleTag (),
                    new MetaTag (),
                }));
        parseAndAssertNodeCount(7);
        assertTrue(node[2] instanceof TitleTag);
        // check the title node
        TitleTag titleTag = (TitleTag) node[2];
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.