Package org.htmlparser.lexer

Examples of org.htmlparser.lexer.Lexer


        try {
            page = new Page(in, null);
        } catch (final UnsupportedEncodingException e) {
            throw new ScimpiException(e);
        }
        final Lexer lexer = new Lexer(page);

        Node node = null;
        try {
            Stack<Snippet> tags = allTags;
            String lineNumbers = "1";
            String template = null;
            tags.push(new HtmlSnippet(lineNumbers, filePath));

            // NOTE done like this the tags can be cached for faster processing
            while ((node = lexer.nextNode()) != null) {
                if (node instanceof Remark) {
                    // TODO need to pick up on comments within tags; at the moment this splits a tag into two causing a
                    // failure later
                    continue;
View Full Code Here


    return textCut(text, len, append);
  }

  public static String html2Text(String html, int len) {
    try {
      Lexer lexer = new Lexer(html);
      Node node;
      StringBuilder sb = new StringBuilder(html.length());
      while ((node = lexer.nextNode()) != null) {
        if (node instanceof TextNode) {
          sb.append(node.toHtml());
        }
        if (sb.length() > len) {
          break;
View Full Code Here

      searchArr[i] = k.getName();
      replacementArr[i] = k.getUrl();
      i++;
    }
    try {
      Lexer lexer = new Lexer(txt);
      Node node;
      StringBuilder sb = new StringBuilder((int) (txt.length() * 1.2));
      while ((node = lexer.nextNode()) != null) {
        if (node instanceof TextNode) {
          sb.append(StringUtils.replaceEach(node.toHtml(), searchArr,
              replacementArr));
        } else {
          sb.append(node.toHtml());
View Full Code Here

        if (null != mContentHandler)
            try
            {
                mParser = new Parser (
                    new Lexer (
                        new Page (
                            input.getByteStream (),
                            input.getEncoding ())));
                locator = new Locator (mParser);
                if (null != mErrorHandler)
View Full Code Here

     * @return An array of two URL arrays, index 0 is a list of images,
     * index 1 is a list of links to possibly follow.
     */
    protected URL[][] getImageLinks (final URL url)
    {
        Lexer lexer;
        URL[][] ret;

        if (null != url)
        {
            try
            {
                lexer = new Lexer (url.openConnection ());
                ret = extractImageLinks (lexer, url);
            }
            catch (Throwable t)
            {
                System.out.println (t.getMessage ());
View Full Code Here

     * Parses the given text to create the tag contents.
     * @param text A string of the form &lt;TAGNAME xx="yy"&gt;.
     */
    public void setText (String text)
    {
        Lexer lexer;
        TagNode output;
       
        lexer = new Lexer (text);
        try
        {
            output = (TagNode)lexer.nextNode ();
            mPage = output.getPage ();
            nodeBegin = output.getStartPosition ();
            nodeEnd = output.getEndPosition ();
            mAttributes = output.getAttributesEx ();
        }
View Full Code Here

     * @return The line number, or -1 if none is available.
     * @see #getColumnNumber
     */
    public int getLineNumber ()
    {
        Lexer lexer;
       
        lexer = mParser.getLexer ();
        return (lexer.getPage ().row (lexer.getCursor ()));
    }
View Full Code Here

     * @return The column number, or -1 if none is available.
     * @see #getLineNumber
     */
    public int getColumnNumber ()
    {
        Lexer lexer;
       
        lexer = mParser.getLexer ();
        return (lexer.getPage ().column (lexer.getCursor ()));
    }
View Full Code Here

    {
        Parser ret;

        if (null == html)
            throw new IllegalArgumentException ("html cannot be null");
        ret = new Parser (new Lexer (new Page (html, charset)));

        return (ret);
    }
View Full Code Here

     * @see #setLexer(Lexer)
     * @see #setConnection(URLConnection)
     */
    public Parser ()
    {
        this (new Lexer (new Page ("")), DEVNULL);
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.lexer.Lexer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.