Package org.htmlparser.beans

Examples of org.htmlparser.beans.StringBean


      if (sTextBody==null) {
          // ****************************
          // Extract plain text from HTML
          if (DebugFile.trace) DebugFile.writeln("new StringBean()");

          StringBean oStrBn = new StringBean();

          try {
            oPrsr.visitAllNodesWith (oStrBn);
          } catch (ParserException pe) {
          throw new MessagingException(pe.getMessage(), pe);
          }

          sTextBody = oStrBn.getStrings();

          oStrBn = null;
      } // fi (sTextBody==null)

      // *******************************
 
View Full Code Here


          DebugFile.writeln(xcpt.getClass().getName()+" "+xcpt.getMessage()+" indexing message "+sGuid+" - "+sSubject);
      }
      if (oStrBuff.length()>0) {
        if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(), "<html>")>=0) {
          Parser oPrsr = Parser.createParser(oStrBuff.toString(), null);
          StringBean oStrs = new StringBean();
          try {
            oPrsr.visitAllNodesWith (oStrs);
          } catch (ParserException pe) {
            if (DebugFile.trace) DebugFile.decIdent();
            throw new IOException(pe.getMessage());         
          }

          if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBean.getStrings())");
          sText = Gadgets.ASCIIEncode(oStrs.getStrings());
          if (DebugFile.trace) DebugFile.writeln("StringBean.getStrings() done");
        } // fi (oStrBuff contains <html>)
        else {
          if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBuffer.toString())");
          sText = Gadgets.ASCIIEncode(oStrBuff.toString());
View Full Code Here

    }
    else if (getContentType().startsWith("text/html")) {
      StringBuffer oHtmlBuff = new StringBuffer();
      getText(oHtmlBuff);
      Parser oPrsr = Parser.createParser(oHtmlBuff.toString(), getEncoding());
      StringBean oStrBn = new StringBean();
      try {
        oPrsr.visitAllNodesWith (oStrBn);
      } catch (ParserException pe) {
        throw new MessagingException(pe.getMessage(), pe);
      }
      // Code for HTML parser 1.4
      // oStrBn.setInputHTML(oHtmlBuff.toString());
      oBuffer.append(oStrBn.getStrings());
    }
    else {
      if (DebugFile.trace) DebugFile.writeln("Multipart = DBMimeMessage.getParts()");

      Multipart oParts = getParts();

      final int iParts = oParts.getCount();

      MimePart oPart;

      int p;
      for (p=0; p<iParts && !bHasPlainTextVersion; p++) {
        oPart = (MimePart) oParts.getBodyPart(p);

        String sType = oPart.getContentType();
        if (null!=sType) sType=sType.toLowerCase();
        String sDisp = oPart.getDisposition();
        if (null==sDisp) sDisp="inline"; else if (sDisp.length()==0) sDisp="inline";

        if (DebugFile.trace) DebugFile.writeln("scanning part " + String.valueOf(p) + sDisp + " " + sType.replace('\r',' ').replace('\n', ' '));

        if (sType.startsWith("text/plain") && sDisp.equalsIgnoreCase("inline")) {
          bHasPlainTextVersion = true;
          DBMimePart.parseMimePart (oBuffer, null,
                                    getFolder().getName(),
                                    getMessageID()!=null ? getMessageID() : getContentID(),
                                    oPart, p);
        }
      }

      if (DebugFile.trace) {
        if (bHasPlainTextVersion)
          DebugFile.writeln("MimeMultipart has plain text version at part " + String.valueOf(p));
        else
          DebugFile.writeln("MimeMultipart has no plain text version, converting part 0 from HTML");
      }

      if (!bHasPlainTextVersion) {
        oPart = (MimePart) oParts.getBodyPart(0);
        StringBuffer oHtml = new StringBuffer();
        DBMimePart.parseMimePart (oHtml, null, getFolder().getName(), getMessageID()!=null ? getMessageID() : getContentID(), oPart, 0);

        Parser oPrsr = Parser.createParser(oHtml.toString(), getEncoding());
        StringBean oStrBn = new StringBean();

        try {
          oPrsr.visitAllNodesWith (oStrBn);
        } catch (ParserException pe) {
          throw new MessagingException(pe.getMessage(), pe);
        }

        // Code for HTML parser 1.4
        // oSB.setInputHTML(oHtml.toString());

        String sStrs = oStrBn.getStrings();

        if (DebugFile.trace) {
          DebugFile.writeln("StringBean.getStrings(");
          if (null!=sStrs) DebugFile.write(sStrs); else DebugFile.write("null");
          DebugFile.writeln(")");
View Full Code Here

        // ****************************
        // Extract plain text from HTML
        if (DebugFile.trace) DebugFile.writeln("new StringBean()");

        StringBean oStrBn = new StringBean();

        try {
          oPrsr.visitAllNodesWith (oStrBn);
        } catch (ParserException pe) {
          if (DebugFile.trace) {
            DebugFile.writeln("org.htmlparser.util.ParserException " + pe.getMessage());
          }
          throw new MessagingException(pe.getMessage(), pe);
        }

        sText = oStrBn.getStrings();

        oStrBn = null;

        // *******************************
        // Set plain text alternative part
View Full Code Here

    // Using HTMLParser to extract the content
    String cleanedContent = null;
    Page htmlPage = new Page(cuttedContent, "UTF-8");
    Parser parser = new Parser(new Lexer(htmlPage));
    StringBean stringBean = new StringBean();

    // replace multiple whitespace with one whitespace
    stringBean.setCollapse(true);
    // Do not extract URLs
    stringBean.setLinks(false);
    // replace &nbsp; with whitespace
    stringBean.setReplaceNonBreakingSpaces(true);

    try {
      // Parse the content
      parser.visitAllNodesWith(stringBean);
      cleanedContent = stringBean.getStrings();

    } catch (ParserException ex) {
      throw new RegainException("Error while parsing content: ", ex);
    }
View Full Code Here

            ResourcePropertyMapping resourcePropertyMapping,
            MarshallingContext context) {
        String str = (String) o;
        Lexer l = new Lexer(str);
        Parser parser = new Parser(l);
        StringBean sb = new StringBean();

        try {
            parser.visitAllNodesWith(sb);
        } catch (ParserException e) {
            log.warn("RETURNING ORIG VAL: " + str);
            return str;
        }
        String ret = sb.getStrings();
        log.debug("RETURNING STRIPPED: " + ret);
        return ret;
    }
View Full Code Here

                stringBuilder.append(s);
            }

            Lexer l = new Lexer(stringBuilder.toString());
            Parser parser = new Parser(l);
            StringBean sb = new StringBean();

            parser.visitAllNodesWith(sb);

            String ret = sb.getStrings();
            return new StringReader(ret);
        } catch (ParserException e) {
            log.warn("Conversion Exception: " + e);
            throw new ConversionException(e.getMessage());
        } catch (IOException e2) {
View Full Code Here

     * @param links if <code>true</code> include hyperlinks in output.
     * @return The textual contents of the page.
     */
    public String extractStrings(boolean links) throws ParserException
    {
        StringBean sb;

        sb = new StringBean();
        sb.setLinks(links);
        sb.setURL(resource);

        return (sb.getStrings());
    }
View Full Code Here

    }

    public void testSerializableStringBean()
        throws IOException, ClassNotFoundException, ParserException
    {
        StringBean sb;
        String text;
        byte[] data;

        sb = new StringBean();
        sb.setURL("http://htmlparser.sourceforge.net/test/example.html");
        text = sb.getStrings();

        data = pickle(sb);
        sb = (StringBean) unpickle(data);

        assertEquals(
            "Strings before and after serialization differ",
            text,
            sb.getStrings());
    }
View Full Code Here

        }
    }

    public void testStringBeanListener()
    {
        final StringBean sb;
        final Boolean hit[] = new Boolean[1];

        sb = new StringBean();
        hit[0] = Boolean.FALSE;
        sb.addPropertyChangeListener(new PropertyChangeListener()
        {
            public void propertyChange(PropertyChangeEvent event)
            {
                if (event.getSource().equals(sb))
                    if (event
                        .getPropertyName()
                        .equals(StringBean.PROP_STRINGS_PROPERTY))
                        hit[0] = Boolean.TRUE;
            }
        });

        hit[0] = Boolean.FALSE;
        sb.setURL("http://htmlparser.sourceforge.net/test/example.html");
        assertTrue(
            "Strings property change not fired for URL change",
            hit[0].booleanValue());

        hit[0] = Boolean.FALSE;
        sb.setLinks(true);
        assertTrue(
            "Strings property change not fired for links change",
            hit[0].booleanValue());
    }
View Full Code Here

TOP

Related Classes of org.htmlparser.beans.StringBean

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.