Package org.encog.parse.tags.read

Examples of org.encog.parse.tags.read.ReadHTML


   */
  public static String stripTags(final String str) {
    final ByteArrayInputStream is
    = new ByteArrayInputStream(str.getBytes());
    final StringBuilder result = new StringBuilder();
    final ReadHTML html = new ReadHTML(is);
    int ch;
    while ((ch = html.read()) != -1) {
      if (ch != 0) {
        result.append((char) ch);
      }
    }
    return result.toString();
View Full Code Here


  private Collection<URL> doSearch(final URL url) throws IOException {
    final Collection<URL> result = new ArrayList<URL>();
    // submit the search

    final InputStream is = url.openStream();
    final ReadHTML parse = new ReadHTML(is);
    final StringBuilder buffer = new StringBuilder();
    boolean capture = false;

    // parse the results
    int ch;
    while ((ch = parse.read()) != -1) {
      if (ch == 0) {
        final Tag tag = parse.getTag();
        if (tag.getName().equalsIgnoreCase("url")) {
          buffer.setLength(0);
          capture = true;
        } else if (tag.getName().equalsIgnoreCase("/url")) {
          result.add(new URL(buffer.toString()));
View Full Code Here

   *            The input stream that the data units are loaded from.
   */
  protected final void loadDataUnits(final InputStream is) {
    final StringBuilder text = new StringBuilder();
    int ch;
    final ReadHTML parse = new ReadHTML(is);
    boolean style = false;
    boolean script = false;

    while ((ch = parse.read()) != -1) {
      if (ch == 0) {

        if (style) {
          createCodeDataUnit(text.toString());
        } else if (script) {
          createCodeDataUnit(text.toString());
        } else {
          createTextDataUnit(text.toString());
        }
        style = false;
        script = false;

        text.setLength(0);
        createTagDataUnit(parse.getTag());
        if (parse.getTag().getName().equalsIgnoreCase("style")) {
          style = true;
        } else if (parse.getTag().getName().equalsIgnoreCase(
            "script")) {
          script = true;
        }
      } else {
        text.append((char) ch);
View Full Code Here

public class TestParseHTML extends TestCase {
  public void testAttributeLess() throws Throwable
  {
    String html = "12<b>12</b>1";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()=='1');
    TestCase.assertTrue(parse.read()=='2');
    TestCase.assertTrue(parse.read()==0);
    TestCase.assertTrue(parse.getTag().getName().equalsIgnoreCase("b"));
    TestCase.assertTrue(parse.getTag().getType()==Tag.Type.BEGIN);
    TestCase.assertTrue(parse.read()=='1');
    TestCase.assertTrue(parse.read()=='2');
    TestCase.assertTrue(parse.read()==0);
    Tag tag = parse.getTag();
    TestCase.assertTrue(tag.getName().equalsIgnoreCase("b"));
    TestCase.assertTrue(tag.getType()==Tag.Type.END);
    TestCase.assertEquals(tag.toString(),"</b>");
    TestCase.assertTrue(parse.read()=='1');
  }
View Full Code Here

 
  public void testAttributes() throws Throwable
  {
    String html="<img src=\"picture.gif\" alt=\"A Picture\">";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()==0);
    Tag tag = parse.getTag();
    TestCase.assertNotNull(tag);
    TestCase.assertTrue(tag.getName().equals("img"));
    //TestCase.assertTrue(html.equals(tag.toString()));
    TestCase.assertTrue(tag.getAttributeValue("src").equals("picture.gif"));
    TestCase.assertTrue(tag.getAttributeValue("alt").equals("A Picture"));
View Full Code Here

 
  public void testAttributesNoDELIM() throws Throwable
  {
    String html="<img src=picture.gif alt=APicture>";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()==0);
    Tag tag = parse.getTag();
    TestCase.assertNotNull(tag);
    TestCase.assertTrue(tag.getName().equals("img"));
    TestCase.assertTrue(tag.getAttributeValue("src").equals("picture.gif"));
    TestCase.assertTrue(tag.getAttributeValue("alt").equals("APicture"));
  }
View Full Code Here

  public void testBoth() throws Throwable
  {
    String html="<br/>";
    String htmlName = "br";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()==0);
    Tag tag = parse.getTag();
    TestCase.assertNotNull(tag);
    TestCase.assertTrue(tag.getType()==Tag.Type.BEGIN);
    TestCase.assertTrue(tag.getName().equals(htmlName));
    parse.readToTag();
    tag = parse.getTag();
    TestCase.assertNotNull(tag);
    TestCase.assertTrue(tag.getType()==Tag.Type.END);
    TestCase.assertTrue(tag.getName().equals(htmlName));
  }
View Full Code Here

 
  public void testBothWithAttributes() throws Throwable
  {
    String html="<img src=\"picture.gif\" alt=\"A Picture\"/>";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()==0);
  }
View Full Code Here

 
  public void testComment() throws Throwable
  {
    String html="a<!-- Hello -->b";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()=='a');
    TestCase.assertTrue(parse.read()==0);
    TestCase.assertTrue(parse.read()=='b');
  }
View Full Code Here

 
  public void testScript() throws Throwable
  {
    String html="a<script>12</script>b";
    ByteArrayInputStream bis = new ByteArrayInputStream(html.getBytes());
    ReadHTML parse = new ReadHTML(bis);
    TestCase.assertTrue(parse.read()=='a');
    TestCase.assertTrue(parse.read()==0);
    TestCase.assertTrue(parse.read()=='1');
    TestCase.assertTrue(parse.read()=='2');
    TestCase.assertTrue(parse.read()==0);
    TestCase.assertTrue(parse.read()=='b')
  }
View Full Code Here

TOP

Related Classes of org.encog.parse.tags.read.ReadHTML

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.