Examples of org.htmlparser.beans.StringBean

org.htmlparser.beans.StringBean
etbeans.org"); // the HTTP is performed here String s = sb.getStrings (); @author Derrick OswaldCreated on December 23, 2002, 5:01 PM

      if (sTextBody==null) {
          // ****************************
          // Extract plain text from HTML
          if (DebugFile.trace) DebugFile.writeln("new StringBean()");


          StringBean oStrBn = new StringBean();


          try {
            oPrsr.visitAllNodesWith (oStrBn);
          } catch (ParserException pe) {
          throw new MessagingException(pe.getMessage(), pe);
          }


          sTextBody = oStrBn.getStrings();


          oStrBn = null;
      } // fi (sTextBody==null)


      // *******************************

View Full Code Here

          DebugFile.writeln(xcpt.getClass().getName()+" "+xcpt.getMessage()+" indexing message "+sGuid+" - "+sSubject);
      }
      if (oStrBuff.length()>0) {
        if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(), "<html>")>=0) {
          Parser oPrsr = Parser.createParser(oStrBuff.toString(), null);
          StringBean oStrs = new StringBean();
          try {
            oPrsr.visitAllNodesWith (oStrs);
          } catch (ParserException pe) {
            if (DebugFile.trace) DebugFile.decIdent();
            throw new IOException(pe.getMessage());          
          }


          if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBean.getStrings())");
          sText = Gadgets.ASCIIEncode(oStrs.getStrings());
          if (DebugFile.trace) DebugFile.writeln("StringBean.getStrings() done");
        } // fi (oStrBuff contains <html>)
        else {
          if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBuffer.toString())");
          sText = Gadgets.ASCIIEncode(oStrBuff.toString());

View Full Code Here

    }
    else if (getContentType().startsWith("text/html")) {
      StringBuffer oHtmlBuff = new StringBuffer();
      getText(oHtmlBuff);
      Parser oPrsr = Parser.createParser(oHtmlBuff.toString(), getEncoding());
      StringBean oStrBn = new StringBean();
      try {
        oPrsr.visitAllNodesWith (oStrBn);
      } catch (ParserException pe) {
        throw new MessagingException(pe.getMessage(), pe);
      }
      // Code for HTML parser 1.4
      // oStrBn.setInputHTML(oHtmlBuff.toString());
      oBuffer.append(oStrBn.getStrings());
    }
    else {
      if (DebugFile.trace) DebugFile.writeln("Multipart = DBMimeMessage.getParts()");


      Multipart oParts = getParts();


      final int iParts = oParts.getCount();


      MimePart oPart;


      int p;
      for (p=0; p<iParts && !bHasPlainTextVersion; p++) {
        oPart = (MimePart) oParts.getBodyPart(p);


        String sType = oPart.getContentType();
        if (null!=sType) sType=sType.toLowerCase();
        String sDisp = oPart.getDisposition();
        if (null==sDisp) sDisp="inline"; else if (sDisp.length()==0) sDisp="inline";


        if (DebugFile.trace) DebugFile.writeln("scanning part " + String.valueOf(p) + sDisp + " " + sType.replace('\r',' ').replace('\n', ' '));


        if (sType.startsWith("text/plain") && sDisp.equalsIgnoreCase("inline")) {
          bHasPlainTextVersion = true;
          DBMimePart.parseMimePart (oBuffer, null,
                                    getFolder().getName(),
                                    getMessageID()!=null ? getMessageID() : getContentID(),
                                    oPart, p);
        }
      }


      if (DebugFile.trace) {
        if (bHasPlainTextVersion)
          DebugFile.writeln("MimeMultipart has plain text version at part " + String.valueOf(p));
        else
          DebugFile.writeln("MimeMultipart has no plain text version, converting part 0 from HTML");
      }


      if (!bHasPlainTextVersion) {
        oPart = (MimePart) oParts.getBodyPart(0);
        StringBuffer oHtml = new StringBuffer();
        DBMimePart.parseMimePart (oHtml, null, getFolder().getName(), getMessageID()!=null ? getMessageID() : getContentID(), oPart, 0);


        Parser oPrsr = Parser.createParser(oHtml.toString(), getEncoding());
        StringBean oStrBn = new StringBean();


        try {
          oPrsr.visitAllNodesWith (oStrBn);
        } catch (ParserException pe) {
          throw new MessagingException(pe.getMessage(), pe);
        }


        // Code for HTML parser 1.4
        // oSB.setInputHTML(oHtml.toString());


        String sStrs = oStrBn.getStrings();


        if (DebugFile.trace) {
          DebugFile.writeln("StringBean.getStrings(");
          if (null!=sStrs) DebugFile.write(sStrs); else DebugFile.write("null");
          DebugFile.writeln(")");

View Full Code Here


        // ****************************
        // Extract plain text from HTML
        if (DebugFile.trace) DebugFile.writeln("new StringBean()");


        StringBean oStrBn = new StringBean();


        try {
          oPrsr.visitAllNodesWith (oStrBn);
        } catch (ParserException pe) {
          if (DebugFile.trace) {
            DebugFile.writeln("org.htmlparser.util.ParserException " + pe.getMessage());
          }
          throw new MessagingException(pe.getMessage(), pe);
        }


        sText = oStrBn.getStrings();


        oStrBn = null;


        // *******************************
        // Set plain text alternative part

View Full Code Here


    // Using HTMLParser to extract the content
    String cleanedContent = null;
    Page htmlPage = new Page(cuttedContent, "UTF-8");
    Parser parser = new Parser(new Lexer(htmlPage));
    StringBean stringBean = new StringBean();


    // replace multiple whitespace with one whitespace
    stringBean.setCollapse(true);
    // Do not extract URLs
    stringBean.setLinks(false);
    // replace &nbsp; with whitespace
    stringBean.setReplaceNonBreakingSpaces(true);


    try {
      // Parse the content
      parser.visitAllNodesWith(stringBean);
      cleanedContent = stringBean.getStrings();


    } catch (ParserException ex) {
      throw new RegainException("Error while parsing content: ", ex);
    }

View Full Code Here

            ResourcePropertyMapping resourcePropertyMapping,
            MarshallingContext context) {
        String str = (String) o;
        Lexer l = new Lexer(str);
        Parser parser = new Parser(l);
        StringBean sb = new StringBean();


        try {
            parser.visitAllNodesWith(sb);
        } catch (ParserException e) {
            log.warn("RETURNING ORIG VAL: " + str);
            return str;
        }
        String ret = sb.getStrings();
        log.debug("RETURNING STRIPPED: " + ret);
        return ret;
    }

View Full Code Here

                stringBuilder.append(s);
            }


            Lexer l = new Lexer(stringBuilder.toString());
            Parser parser = new Parser(l);
            StringBean sb = new StringBean();


            parser.visitAllNodesWith(sb);


            String ret = sb.getStrings();
            return new StringReader(ret);
        } catch (ParserException e) {
            log.warn("Conversion Exception: " + e);
            throw new ConversionException(e.getMessage());
        } catch (IOException e2) {

View Full Code Here

     * @param links if <code>true</code> include hyperlinks in output.
     * @return The textual contents of the page.
     */
    public String extractStrings(boolean links) throws ParserException
    {
        StringBean sb;


        sb = new StringBean();
        sb.setLinks(links);
        sb.setURL(resource);


        return (sb.getStrings());
    }

View Full Code Here

    }


    public void testSerializableStringBean()
        throws IOException, ClassNotFoundException, ParserException
    {
        StringBean sb;
        String text;
        byte[] data;


        sb = new StringBean();
        sb.setURL("http://htmlparser.sourceforge.net/test/example.html");
        text = sb.getStrings();


        data = pickle(sb);
        sb = (StringBean) unpickle(data);


        assertEquals(
            "Strings before and after serialization differ",
            text,
            sb.getStrings());
    }

View Full Code Here

        }
    }


    public void testStringBeanListener()
    {
        final StringBean sb;
        final Boolean hit[] = new Boolean[1];


        sb = new StringBean();
        hit[0] = Boolean.FALSE;
        sb.addPropertyChangeListener(new PropertyChangeListener()
        {
            public void propertyChange(PropertyChangeEvent event)
            {
                if (event.getSource().equals(sb))
                    if (event
                        .getPropertyName()
                        .equals(StringBean.PROP_STRINGS_PROPERTY))
                        hit[0] = Boolean.TRUE;
            }
        });


        hit[0] = Boolean.FALSE;
        sb.setURL("http://htmlparser.sourceforge.net/test/example.html");
        assertTrue(
            "Strings property change not fired for URL change",
            hit[0].booleanValue());


        hit[0] = Boolean.FALSE;
        sb.setLinks(true);
        assertTrue(
            "Strings property change not fired for links change",
            hit[0].booleanValue());
    }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.htmlparser.beans.StringBean

com.apress.progwt.server.lucene.HTMLAnalyzer

com.apress.progwt.server.lucene.HTMLConverter

com.knowgate.hipermail.DBMimeMessage

com.knowgate.hipermail.SessionHandler

com.knowgate.lucene.MailIndexer

com.wordpress.util.StringUtil

com.zesped.util.MailSessionHandler

net.sf.regain.crawler.preparator.HtmlPreparator

org.exoplatform.services.document.impl.HTMLDocumentReader

org.htmlparser.Parser

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.