Package au.id.jericho.lib.html

Examples of au.id.jericho.lib.html.Source


    return wrapper;
  }
 
  private static String extractText(String htmlContent) {
    if (htmlContent != null && htmlContent.length() > 0) {
      Source source = new Source(htmlContent);
      TextExtractor extractor = new TextExtractor(source);
      extractor.setConvertNonBreakingSpaces(true);
      extractor.setExcludeNonHTMLElements(false);
      extractor.setIncludeAttributes(false);
      String output = extractor.toString();
View Full Code Here


  ) throws Exception{
   
    try{
      // TODO:New File Generate (Write.jsp, View.jsp)
     
      Source source=new Source(new String(definition));
      source.setLogWriter(new OutputStreamWriter(System.err)); // send
                                    // log
                                    // messages
                                    // to
                                    // stderr
      FormFields formFields=source.findFormFields();
      System.out.println("The document contains "+formFields.size()+" form fields:\n");
     
      StringBuffer sb = new StringBuffer();
      for (Iterator i=formFields.iterator(); i.hasNext();) {
        FormField formField=(FormField)i.next();
View Full Code Here

      }
    }
  }

  private void saveFormFilesForImport(String definition, String defVerId) throws Exception {
    Source source = new Source(new String(definition));
    source.setLogWriter(new OutputStreamWriter(System.err));
    FormFields formFields = source.findFormFields();

    StringBuffer sb = new StringBuffer();
    for (Iterator i = formFields.iterator(); i.hasNext();) {
      FormField formField = (FormField) i.next();
      formFields.addValue(formField.getName(), sb.toString());
      sb.setLength(0);
    }
   
    OutputDocument outputDocument = new OutputDocument(source);
    outputDocument.replace(formFields);

    String DEFINITION_ROOT = GlobalContext.getPropertyString("server.definition.path", "./uengine/definition/");

    String HTML_PATH = DEFINITION_ROOT + defVerId + ".html";
    OutputStreamWriter bw = null;
   
    try {
      bw = new OutputStreamWriter(new FileOutputStream(HTML_PATH), "UTF-8");
      bw.write(outputDocument.toString());
      bw.close();
    } catch (Exception e) {
      //throw e;
//      outputDocument = new OutputDocument(source);
      bw.write(source.toString());
      bw.close();
    } finally {
      if (bw != null)
        try {
          bw.close();
View Full Code Here

     *            CrawlURI we're processing.
     * @param cs
     *            Sequence from underlying ReplayCharSequence.
     */
    protected void extract(CrawlURI curi, CharSequence cs) {
        Source source = new Source(cs);
        List<Element> elements = source.findAllElements(StartTagType.NORMAL);
        for (Element element : elements) {
            String elementName = element.getName();
            Attributes attributes;
            if (elementName.equals(HTMLElementName.META)) {
                if (processMeta(curi, element)) {
View Full Code Here

  private Element element ;
  private HTag(Element element){
    this.element = element ;
  }
  private HTag(Reader content, int index) throws IOException {
    this((Element)(new Source(content).getChildElements().get(index))) ;
  }
View Full Code Here

TOP

Related Classes of au.id.jericho.lib.html.Source

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.