Package net.sourceforge.chaperon.model.lexicon

Source Code of net.sourceforge.chaperon.model.lexicon.LexiconFactory

/*
*  Copyright (C) Chaperon. All rights reserved.
*  -------------------------------------------------------------------------
*  This software is published under the terms of the Apache Software License
*  version 1.1, a copy of which has been included  with this distribution in
*  the LICENSE file.
*/

package net.sourceforge.chaperon.model.lexicon;

import net.sourceforge.chaperon.model.pattern.*;
import net.sourceforge.chaperon.model.symbol.Terminal;

import org.xml.sax.*;
import org.xml.sax.helpers.*;

import java.util.Stack;

/**
* This class should generate a lexicon from a SAX stream
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
* @version CVS $Id: LexiconFactory.java,v 1.3 2003/12/09 19:55:52 benedikta Exp $
*/
public class LexiconFactory extends DefaultHandler
{
  /** The namspace of the lexicon configuration */
  public static final String NS = "http://chaperon.sourceforge.net/schema/lexicon/1.0";

  /** Element name */
  public static final String LEXEME_ELEMENT = "lexeme";

  /** Attribute name of the symbol property */
  public static final String SYMBOL_ATTRIBUTE = "symbol";

  /** Element name */
  public static final String LEXICON_ELEMENT = "lexicon";

  /** Element name */
  public static final String ALTERNATION_ELEMENT = "alt";

  /** Element name */
  public static final String BEGINOFLINE_ELEMENT = "bol";

  /** Element name */
  public static final String CHARACTERCLASS_ELEMENT = "cclass";

  /** Attribute name of the exclusive property */
  public static final String EXCLUSIVE_ATTRIBUTE = "exclusive";

  /** Element name */
  public static final String CHARACTERINTERVAL_ELEMENT = "cinterval";

  /** Attribute name of the min property */
  public static final String CHARACTERINTERVAL_MIN_ATTRIBUTE = "min";

  /** Attribute name of the max property */
  public static final String CHARACTERINTERVAL_MAX_ATTRIBUTE = "max";

  /** Element name */
  public static final String CHARACTERSTRING_ELEMENT = "cstring";

  /** Attribute name of the sequence property */
  public static final String CHARACTERSTRING_SEQUENCE_ATTRIBUTE = "content";

  /** Element name */
  public static final String CHARACTERSET_ELEMENT = "cset";

  /** Attribute name of the characters property */
  public static final String CHARACTERSET_CHARACTERS_ATTRIBUTE = "content";

/*  public final static String CHARACTERGENERIC_ELEMENT = "cgeneric";

  public final static String CHARACTERGENERIC_CODE_ATTRIBUTE = "code";*/
  public static final String CODE_ATTRIBUTE = "code";

  /** Element name */
  public static final String CONCATENATION_ELEMENT = "concat";

  /** Element name */
  public static final String GROUP_ELEMENT = "group";

  /** Element name */
  public static final String UNIVERSALCHARACTER_ELEMENT = "cuniversal";

  /** Element name */
  public static final String ENDOFLINE_ELEMENT = "eol";

  /** Attribute name of the minOccurs property */
  public static final String MINOCCURS_ATTRIBUTE = "minOccurs";

  /** Attribute name of the minOccurs property */
  public static final String MAXOCCURS_ATTRIBUTE = "maxOccurs";
  private static final int STATE_OUTER = 0;
  private static final int STATE_LEXICON = 1;
  private static final int STATE_LEXEME = 2;
  private static final int STATE_CHARACTERCLASS = 3;
  private static final int STATE_CHARACTERCLASSELEMENT = 4;
  private int state = STATE_OUTER;
  private Lexicon lexicon;
  private Locator locator = null;
  private Stack stack;

  /**
   * Returns the generated lexicon
   *
   * @return Lexicon
   */
  public Lexicon getLexicon()
  {
    return lexicon;
  }

  private String getLocation()
  {
    if (locator==null)
      return "unknown";

    return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber();
  }

  /**
   * Receive an object for locating the origin of SAX document events.
   */
  public void setDocumentLocator(Locator locator)
  {
    this.locator = locator;
  }

  /**
   * Receive notification of the beginning of a document.
   */
  public void startDocument()
  {
    stack = new Stack();
  }

  /**
   * Return the content of the minOccurs attribute
   *
   * @param atts Attributes of an element
   *
   * @return minOccurs attribute
   */
  private int getMinOccursFromAttributes(Attributes atts)
  {
    int minOccurs = 1;
    String attribute = atts.getValue(MINOCCURS_ATTRIBUTE);

    if ((attribute!=null) && (attribute.length()>0))
    {
      try
      {
        minOccurs = Integer.parseInt(attribute);
      }
      catch (NumberFormatException e)
      {
        // System.err.println("error: "+attribute+" ist not an integer number");
        minOccurs = 1;
      }

      if (minOccurs<0)
        minOccurs = 0;
    }

    return minOccurs;
  }

  /**
   * Return the content of the maxOccurs attribute
   *
   * @param atts Attributes of an element
   *
   * @return maxOccurs attribute
   */
  private int getMaxOccursFromAttributes(Attributes atts)
  {
    int maxOccurs = 1;
    String attribute = atts.getValue(MAXOCCURS_ATTRIBUTE);

    if ((attribute!=null) && (attribute.length()>0))
    {
      if (attribute.equals("*"))
        maxOccurs = Integer.MAX_VALUE;
      else
      {
        try
        {
          maxOccurs = Integer.parseInt(attribute);
        }
        catch (NumberFormatException e)
        {
          // System.err.println("error: "+attribute+" ist not an integer number");
          maxOccurs = 1;
        }

        if (maxOccurs<1)
          maxOccurs = 1;
      }
    }

    return maxOccurs;
  }

  /**
   * @param atts
   *
   * @return
   */
  private boolean getExclusiveFromAttributes(Attributes atts)
  {
    String attribute = atts.getValue(EXCLUSIVE_ATTRIBUTE);

    if ((attribute!=null) && (attribute.length()>0))
    {
      boolean value = false;

      try
      {
        value = Boolean.valueOf(attribute).booleanValue();
        return value;
      }
      catch (Exception e)
      {
        return false;
      }
    }

    return false;
  }

  /**
   * Receive notification of the beginning of an element.
   *
   * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
   *        or if Namespace processing is not being performed.
   * @param localName The local name (without prefix), or the empty string if Namespace processing
   *        is not being performed.
   * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
   *        available.
   * @param atts The attributes attached to the element. If there are no attributes, it shall be an
   *        empty Attributes object.
   */
  public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
    throws SAXException
  {
    if (namespaceURI.equals(NS))
    {
      if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_OUTER))
      {
        Lexicon lexicon = new Lexicon();
        lexicon.setLocation(getLocation());
        stack.push(lexicon);

        state = STATE_LEXICON;
      }
      else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXICON))
      {
        Lexeme lexeme = new Lexeme();
        lexeme.setLocation(getLocation());
        if (atts.getValue(SYMBOL_ATTRIBUTE)!=null)
          lexeme.setSymbol(new Terminal(atts.getValue(SYMBOL_ATTRIBUTE)));

        stack.push(lexeme);

        state = STATE_LEXEME;
      }
      else if ((localName.equals(ALTERNATION_ELEMENT)) && (state==STATE_LEXEME))
      {
        Alternation alternation = new Alternation();
        alternation.setLocation(getLocation());

        alternation.setMinOccurs(getMinOccursFromAttributes(atts));
        alternation.setMaxOccurs(getMaxOccursFromAttributes(atts));
        stack.push(alternation);
      }
      else if ((localName.equals(CONCATENATION_ELEMENT)) && (state==STATE_LEXEME))
      {
        Concatenation concatenation = new Concatenation();
        concatenation.setLocation(getLocation());

        concatenation.setMinOccurs(getMinOccursFromAttributes(atts));
        concatenation.setMaxOccurs(getMaxOccursFromAttributes(atts));
        stack.push(concatenation);
      }
      else if ((localName.equals(CHARACTERSTRING_ELEMENT)) && (state==STATE_LEXEME))
      {
        CharacterString characterstring = new CharacterString();
        characterstring.setLocation(getLocation());

        characterstring.setMinOccurs(getMinOccursFromAttributes(atts));
        characterstring.setMaxOccurs(getMaxOccursFromAttributes(atts));

        if (atts.getValue(CODE_ATTRIBUTE)!=null)
        {
          char character = (char)Integer.parseInt(atts.getValue(CODE_ATTRIBUTE));
          characterstring.setString(String.valueOf(character));
        }
        else
          characterstring.setString(atts.getValue(CHARACTERSTRING_SEQUENCE_ATTRIBUTE));

        stack.push(characterstring);
      }
      else if ((localName.equals(GROUP_ELEMENT)) && (state==STATE_LEXEME))
      {
        PatternGroup group = new PatternGroup();
        group.setLocation(getLocation());

        group.setMinOccurs(getMinOccursFromAttributes(atts));
        group.setMaxOccurs(getMaxOccursFromAttributes(atts));
        stack.push(group);
      }
      else if ((localName.equals(UNIVERSALCHARACTER_ELEMENT)) && (state==STATE_LEXEME))
      {
        UniversalCharacter uni = new UniversalCharacter();
        uni.setLocation(getLocation());

        uni.setMinOccurs(getMinOccursFromAttributes(atts));
        uni.setMaxOccurs(getMaxOccursFromAttributes(atts));

        stack.push(uni);
      }
      else if ((localName.equals(BEGINOFLINE_ELEMENT)) && (state==STATE_LEXEME))
      {
        BeginOfLine bol = new BeginOfLine();
        bol.setLocation(getLocation());

        stack.push(bol);
      }
      else if ((localName.equals(ENDOFLINE_ELEMENT)) && (state==STATE_LEXEME))
      {
        EndOfLine eol = new EndOfLine();

        stack.push(eol);
      }
      else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_LEXEME))
      {
        CharacterClass characterclass = new CharacterClass();
        characterclass.setLocation(getLocation());

        characterclass.setExclusive(getExclusiveFromAttributes(atts));
        characterclass.setMinOccurs(getMinOccursFromAttributes(atts));
        characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts));
        stack.push(characterclass);

        state = STATE_CHARACTERCLASS;
      }
      else if ((localName.equals(CHARACTERSET_ELEMENT)) && (state==STATE_CHARACTERCLASS))
      {
        CharacterSet characterset = new CharacterSet();
        characterset.setLocation(getLocation());

        if (atts.getValue(CODE_ATTRIBUTE)!=null)
        {
          char character = (char)Integer.decode(atts.getValue(CODE_ATTRIBUTE)).intValue();
          characterset.setCharacters(String.valueOf(character));
        }
        else
          characterset.setCharacters(atts.getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE));

        stack.push(characterset);

        state = STATE_CHARACTERCLASSELEMENT;
      }
      else if ((localName.equals(CHARACTERINTERVAL_ELEMENT)) && (state==STATE_CHARACTERCLASS))
      {
        CharacterInterval characterinterval = new CharacterInterval();
        characterinterval.setLocation(getLocation());

        characterinterval.setMinimum(atts.getValue(CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0));
        characterinterval.setMaximum(atts.getValue(CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0));
        stack.push(characterinterval);

        state = STATE_CHARACTERCLASSELEMENT;
      }
      else
        throw new SAXException("Unexpected element "+qName+" at "+getLocation());
    }
    else
      throw new SAXException("Unexpected element "+qName+" at "+getLocation());
  }

  /**
   * Receive notification of the end of an element.
   *
   * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
   *        or if Namespace processing is not being performed.
   * @param localName The local name (without prefix), or the empty string if Namespace processing
   *        is not being performed.
   * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
   *        available.
   *
   * @throws SAXException
   */
  public void endElement(String namespaceURI, String localName, String qName)
    throws SAXException
  {
    if (namespaceURI.equals(NS))
    {
      if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_LEXICON))
      {
        lexicon = (Lexicon)stack.pop();
        state = STATE_OUTER;
      }
      else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXEME))
      {
        Lexeme lexeme = (Lexeme)stack.pop();
        Lexicon lexicon = (Lexicon)stack.peek();

        lexicon.addLexeme(lexeme);
        state = STATE_LEXICON;
      }
      else if (((localName.equals(ALTERNATION_ELEMENT)) ||
               (localName.equals(CONCATENATION_ELEMENT)) ||
               (localName.equals(CHARACTERSTRING_ELEMENT)) || (localName.equals(GROUP_ELEMENT)) ||
               (localName.equals(UNIVERSALCHARACTER_ELEMENT)) ||
               (localName.equals(BEGINOFLINE_ELEMENT)) || (localName.equals(ENDOFLINE_ELEMENT))) &&
               (state==STATE_LEXEME))
      {
        Pattern patternelement = (Pattern)stack.pop();

        if (stack.peek() instanceof Alternation)
        {
          Alternation alternation = (Alternation)stack.peek();

          alternation.addPattern(patternelement);
        }
        else if (stack.peek() instanceof Concatenation)
        {
          Concatenation concatenation = (Concatenation)stack.peek();

          concatenation.addPattern(patternelement);
        }
        else if (stack.peek() instanceof PatternGroup)
        {
          PatternGroup group = (PatternGroup)stack.peek();

          group.addPattern(patternelement);
        }
        else if (stack.peek() instanceof Lexeme)
        {
          Lexeme lexeme = (Lexeme)stack.peek();

          lexeme.setDefinition(patternelement);
        }
      }
      else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_CHARACTERCLASS))
      {
        Pattern patternelement = (Pattern)stack.pop();

        if (stack.peek() instanceof Alternation)
        {
          Alternation alternation = (Alternation)stack.peek();

          alternation.addPattern(patternelement);
        }
        else if (stack.peek() instanceof Concatenation)
        {
          Concatenation concatenation = (Concatenation)stack.peek();

          concatenation.addPattern(patternelement);
        }
        else if (stack.peek() instanceof PatternGroup)
        {
          PatternGroup group = (PatternGroup)stack.peek();

          group.addPattern(patternelement);
        }
        else if (stack.peek() instanceof Lexeme)
        {
          Lexeme lexeme = (Lexeme)stack.peek();

          lexeme.setDefinition(patternelement);
        }

        state = STATE_LEXEME;
      }
      else if (((localName.equals(CHARACTERSET_ELEMENT)) ||
               (localName.equals(CHARACTERINTERVAL_ELEMENT))) &&
               (state==STATE_CHARACTERCLASSELEMENT))
      {
        CharacterClassElement characterclasselement = (CharacterClassElement)stack.pop();
        CharacterClass characterclass = (CharacterClass)stack.peek();

        characterclass.addCharacterClassElement(characterclasselement);

        state = STATE_CHARACTERCLASS;
      }
      else
        throw new SAXException("Unexpected element "+qName+" at "+getLocation());
    }
    else
      throw new SAXException("Unexpected element "+qName+" at "+getLocation());
  }
}
TOP

Related Classes of net.sourceforge.chaperon.model.lexicon.LexiconFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.