Package org.apache.cocoon.transformation

Source Code of org.apache.cocoon.transformation.PatternTransformer

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.transformation;

import net.sourceforge.chaperon.build.LexicalAutomatonBuilder;
import net.sourceforge.chaperon.common.Decoder;
import net.sourceforge.chaperon.model.lexicon.Lexicon;
import net.sourceforge.chaperon.model.lexicon.LexiconFactory;
import net.sourceforge.chaperon.process.LexicalAutomaton;
import net.sourceforge.chaperon.process.PatternProcessor;

import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;
import org.apache.avalon.framework.parameters.ParameterException;
import org.apache.avalon.framework.parameters.Parameterizable;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.avalon.framework.service.ServiceManager;
import org.apache.avalon.framework.service.Serviceable;

import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.xml.XMLUtils;
import org.apache.cocoon.caching.CacheableProcessingComponent;
import org.apache.cocoon.components.source.SourceUtil;
import org.apache.cocoon.environment.SourceResolver;

//import org.apache.commons.logging.impl.AvalonLogger;

import org.apache.excalibur.source.Source;
import org.apache.excalibur.source.SourceException;
import org.apache.excalibur.source.SourceValidity;
import org.apache.excalibur.store.Store;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import java.io.IOException;
import java.io.Serializable;

import java.util.Map;

/**
* This transfomer transforms text pattern of a XML file into lexemes by using a lexicon file.
*
* <p>
* Input:
* </p>
* <pre>
* &lt;section&gt;
*  Text 123 bla
* &lt;/section&gt;
* </pre>
*
* <p>
* can be transformed into the following output:
* </p>
* <pre>
* &lt;section&gt;
*  Text
*  &lt;lexeme symbol="number" text="123"/&gt;
*  bla
* &lt;/section&gt;
* </pre>
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels</a>
* @version $Id: PatternTransformer.java 433543 2006-08-22 06:22:54Z crossley $
*/
public class PatternTransformer extends AbstractTransformer
                                implements LogEnabled, Serviceable, Recyclable,
                                           Disposable, Parameterizable, CacheableProcessingComponent {

  /** Namespace for the SAX events. */
  public static final String NS = "http://chaperon.sourceforge.net/schema/lexemes/2.0";
  private String lexicon = null;
  private Source lexiconSource = null;
  private Logger logger = null;
  private ServiceManager manager = null;
  private SourceResolver resolver = null;
  private LexicalAutomaton automaton = null;
  private PatternProcessor processor = new PatternProcessor();
  private boolean groups = false;
  private StringBuffer buffer = new StringBuffer();
  private StringBuffer output = new StringBuffer();

  /**
   * Provide component with a logger.
   *
   * @param logger the logger
   */
  public void enableLogging(Logger logger)
  {
    this.logger = logger;
  }

  /**
   * Pass the ServiceManager to the Serviceable. The Serviceable implementation should use the
   * specified ServiceManager to acquire the services it needs for execution.
   *
   * @param manager The ServiceManager which this Serviceable uses.
   */
  public void service(ServiceManager manager)
  {
    this.manager = manager;
  }

  /**
   * Provide component with parameters.
   *
   * @param parameters the parameters
   *
   * @throws ParameterException if parameters are invalid
   */
  public void parameterize(Parameters parameters) throws ParameterException
  {
    groups = parameters.getParameterAsBoolean("groups", false);
  }

  /**
   * Set the SourceResolver, objectModel Map, the source and sitemap Parameters used to process the
   * request.
   *
   * @param resolver Source resolver
   * @param objectmodel Object model
   * @param src Source
   * @param parameters Parameters
   *
   * @throws IOException
   * @throws ProcessingException
   * @throws SAXException
   */
  public void setup(SourceResolver resolver, Map objectmodel, String src, Parameters parameters)
    throws ProcessingException, SAXException, IOException
  {
    this.resolver = resolver;

    Store store = null;

    try
    {
      this.lexicon = src;

      this.lexiconSource = resolver.resolveURI(this.lexicon);

      // Retrieve the lexical automaton from the transient store
      store = (Store)this.manager.lookup(Store.TRANSIENT_STORE);

      LexicalAutomatonEntry entry = (LexicalAutomatonEntry)store.get(this.lexiconSource.getURI());

      // If the lexicon has changed, rebuild the lexical automaton
      if ((entry==null) || (entry.getValidity()==null) ||
          (entry.getValidity().isValid(this.lexiconSource.getValidity())<=0))
      {
        this.logger.info("(Re)building the automaton from '"+this.lexiconSource.getURI()+"'");

        if (this.lexiconSource.getInputStream()==null)
          throw new ProcessingException("Source '"+this.lexiconSource.getURI()+"' not found");

        LexiconFactory factory = new LexiconFactory();
        SourceUtil.toSAX(this.manager, this.lexiconSource, null, factory);

        Lexicon lexicon = factory.getLexicon();

        LexicalAutomatonBuilder builder =
          new LexicalAutomatonBuilder(lexicon/*, new AvalonLogger(this.logger)*/);

        this.automaton = builder.getLexicalAutomaton();

        this.logger.info("Store automaton into store for '"+this.lexiconSource.getURI()+"'");
        store.store(this.lexiconSource.getURI(),
                    new LexicalAutomatonEntry(this.automaton, this.lexiconSource.getValidity()));
      }
      else
      {
        this.logger.info("Getting automaton from store for '"+this.lexiconSource.getURI()+"'");
        this.automaton = entry.getLexicalAutomaton();
      }
    }
    catch (SourceException se)
    {
      throw new ProcessingException("Error during resolving of '"+src+"'.", se);
    }
    catch (ServiceException se)
    {
      throw new ProcessingException("Could not lookup for component", se);
    }
    finally
    {
      if (store!=null)
        this.manager.release(store);
    }
  }

  /**
   * Generate the unique key. This key must be unique inside the space of this component.
   *
   * @return The generated key hashes the src
   */
  public Serializable getKey()
  {
    return this.lexiconSource.getURI();
  }

  /**
   * Generate the validity object.
   *
   * @return The generated validity object or <code>null</code> if the component is currently not
   *         cacheable.
   */
  public SourceValidity getValidity()
  {
    return this.lexiconSource.getValidity();
  }

  /**
   * Recycle this component. All instance variables are set to <code>null</code>.
   */
  public void recycle()
  {
    if ((this.resolver!=null) && (this.lexiconSource!=null))
    {
      this.resolver.release(this.lexiconSource);
      this.lexiconSource = null;
    }

    this.automaton = null;
    super.recycle();
  }

  /**
   * The dispose operation is called at the end of a components lifecycle.
   */
  public void dispose()
  {
    if ((this.resolver!=null) && (this.lexiconSource!=null))
    {
      this.resolver.release(this.lexiconSource);
      this.lexiconSource = null;
    }

    this.manager = null;
  }

  /**
   * Receive notification of the beginning of an element.
   *
   * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if
   *        Namespace processing is not being performed.
   * @param loc The local name (without prefix), or the empty string if Namespace processing is not
   *        being performed.
   * @param raw The raw XML 1.0 name (with prefix), or the empty string if raw names are not
   *        available.
   * @param a The attributes attached to the element. If there are no attributes, it shall be an
   *        empty Attributes object.
   *
   * @throws SAXException
   */
  public void startElement(String uri, String loc, String raw, Attributes a)
    throws SAXException
  {
    search();

    if (contentHandler!=null)
      contentHandler.startElement(uri, loc, raw, a);
  }

  /**
   * Receive notification of the end of an element.
   *
   * @param uri The Namespace URI, or the empty string if the element has no Namespace URI or if
   *        Namespace processing is not being performed.
   * @param loc The local name (without prefix), or the empty string if Namespace processing is not
   *        being performed.
   * @param raw The raw XML 1.0 name (with prefix), or the empty string if raw names are not
   *        available.
   *
   * @throws SAXException
   */
  public void endElement(String uri, String loc, String raw)
    throws SAXException
  {
    search();

    if (contentHandler!=null)
      contentHandler.endElement(uri, loc, raw);
  }

  /**
   * Receive notification of character data.
   *
   * @param c The characters from the XML document.
   * @param start The start position in the array.
   * @param len The number of characters to read from the array.
   *
   * @throws SAXException
   */
  public void characters(char[] c, int start, int len)
    throws SAXException
  {
    buffer.append(c, start, len);
  }

  /**
   * Receive notification of ignorable whitespace in element content.
   *
   * @param c The characters from the XML document.
   * @param start The start position in the array.
   * @param len The number of characters to read from the array.
   *
   * @throws SAXException
   */
  public void ignorableWhitespace(char[] c, int start, int len)
    throws SAXException
  {
    buffer.append(c, start, len);
  }

  /**
   * Receive notification of a processing instruction.
   *
   * @param target The processing instruction target.
   * @param data The processing instruction data, or null if none was supplied.
   *
   * @throws SAXException
   */
  public void processingInstruction(String target, String data)
    throws SAXException
  {
    search();

    if (contentHandler!=null)
      contentHandler.processingInstruction(target, data);
  }

  /**
   * Report an XML comment anywhere in the document.
   *
   * @param ch An array holding the characters in the comment.
   * @param start The starting position in the array.
   * @param len The number of characters to use from the array.
   *
   * @throws SAXException
   */
  public void comment(char[] ch, int start, int len) throws SAXException
  {
    search();

    if (lexicalHandler!=null)
      lexicalHandler.comment(ch, start, len);
  }

  /**
   * @throws SAXException
   */
  private void search() throws SAXException
  {
    if (buffer.length()<=0)
      return;

    char[] text = buffer.toString().toCharArray();

    String lexemesymbol;
    String lexemetext;
    String[] groups = null;
    int lexemeindex = 0;
    int position = 0;

    output.setLength(0);
    do
    {
      lexemesymbol = null;
      lexemetext = null;

      for (lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--)
      {
        processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex));

        if ((processor.match(text, position)) &&
            ((lexemetext==null) || (processor.getGroup().length()>=lexemetext.length())))
        {
          lexemesymbol = automaton.getLexemeSymbol(lexemeindex);
          lexemetext = processor.getGrou
TOP

Related Classes of org.apache.cocoon.transformation.PatternTransformer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.