Package com.gentics.cr.lucene.indexer.transformer.html

Source Code of com.gentics.cr.lucene.indexer.transformer.html.HTMLEditorKitStripper

package com.gentics.cr.lucene.indexer.transformer.html;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.ParserDelegator;

import com.gentics.cr.CRResolvableBean;
import com.gentics.cr.configuration.GenericConfiguration;
import com.gentics.cr.lucene.indexer.transformer.ContentTransformer;

/**
*
* Last changed: $Date: 2009-06-24 17:10:19 +0200 (Mi, 24 Jun 2009) $
* @version $Revision: 99 $
* @author $Author: supnig@constantinopel.at $
*
*/
public class HTMLEditorKitStripper extends ContentTransformer {

  /**
   * Create new instance of HTMLEdirtorKitStripper.
   * @param config
   */
  public HTMLEditorKitStripper(GenericConfiguration config) {
    super(config);

  }

  private static List<String> extractText(Reader reader) throws IOException {
    final ArrayList<String> list = new ArrayList<String>();

    ParserDelegator parserDelegator = new ParserDelegator();
    ParserCallback parserCallback = new ParserCallback() {
      public void handleText(final char[] data, final int pos) {
        list.add(new String(data));
      }

      public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {
      }

      public void handleEndTag(Tag t, final int pos) {
      }

      public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) {
      }

      public void handleComment(final char[] data, final int pos) {
      }

      public void handleError(final java.lang.String errMsg, final int pos) {
      }
    };
    parserDelegator.parse(reader, parserCallback, true);
    return list;
  }

  private InputStream convertToInputStream(Object obj) {
    ByteArrayInputStream s;
    if (obj instanceof String) {
      s = new ByteArrayInputStream(((String) obj).getBytes());
    } else {
      throw new IllegalArgumentException("Parameter must be instance of String");
    }
    return s;
  }

  /**
   * @param obj
   */
  public Reader getContents(Object obj) {
    String s = getStringContents(obj);
    if (s != null) {
      return new StringReader(s);
    }
    return null;
  }

  private Reader tidy(InputStream is) {
    return TidyHelper.tidy(is);
  }

  private static String newline = System.getProperty("line.separator");

  /**
   * @param obj
   */
  public String getStringContents(Object obj) {

    String ret = "";
    List<String> lines;
    try {
      Reader r = tidy(convertToInputStream(obj));
      lines = HTMLEditorKitStripper.extractText(r);
      for (String line : lines) {
        ret += line + newline;
      }
    } catch (Exception e) {
      // Catch all exceptions to not disturb indexer
      e.printStackTrace();
    }
    return ret;
  }

  @Override
  public void processBean(CRResolvableBean bean) {
    // TODO Auto-generated method stub

  }

  @Override
  public void destroy() {
    // TODO Auto-generated method stub

  }
}
TOP

Related Classes of com.gentics.cr.lucene.indexer.transformer.html.HTMLEditorKitStripper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.