Package anvil.parser

Source Code of anvil.parser.Parser

/*
* $Id: Parser.java,v 1.9 2002/09/16 08:05:03 jkl Exp $
*
* Copyright (c) 2002 Njet Communications Ltd. All Rights Reserved.
*
* Use is subject to license terms, as defined in
* Anvil Sofware License, Version 1.1. See LICENSE
* file, or http://njet.org/license-1.1.txt
*/
package anvil.parser;

import anvil.Location;
import java.io.InputStream;
import java.io.IOException;
import java.io.PushbackInputStream;
import java.net.URL;

/**
* class Parser
*
* @author: Jani Lehtim�ki
*/
public class Parser
  implements Locator
{

  private URL _url = null;
  private PushbackInputStream _inputStream = null;
  private StringBuffer _buffer = new StringBuffer();
 
  private DocumentHandler _documentHandler;
 
  private int _line = 1;
  private int _column = 1;
  private int _previousColumn = 0;

  private int _startLine = 1;
  private int _startColumn = 1;
 

  public Parser()
  {
  }
 

  protected int read() throws IOException
  {
    int ch;
    ch = _inputStream.read();
    _previousColumn = _column;
    if (ch == '\n') {
      _line++;
      _column = 1;
    } else {
      _column++;
    }
    return ch;
  }


  protected void unread(int ch) throws IOException
  {
    if (ch == '\n') {
      _line--;
      _column = _previousColumn;
    } else {
      _column--;
    }
    _inputStream.unread(ch);
  }

 
  protected void readCDataSection() throws IOException
  {
    StringBuffer buffer = _buffer;
    int dashcount = 0;
    int count = 0;
    int ch;
   
    buffer.setLength(0);

    // <![
    if ((ch = read()) != 'C') {
      unread(ch);
      readComment("[");
      return;
    }

    if ((ch = read()) != 'D') {
      unread(ch);
      readComment("[C");
      return;
    }

    if ((ch = read()) != 'A') {
      unread(ch);
      readComment("[CD");
      return;
    }

    if ((ch = read()) != 'T') {
      unread(ch);
      readComment("[CDA");
      return;
    }

    if ((ch = read()) != 'A') {
      unread(ch);
      readComment("[CDAT");
      return;
    }

    if ((ch = read()) != '[') {
      unread(ch);
      readComment("[CDATA");
      return;
    }
   
    int state = 0;

    out:
    while(true) {
      ch = read();
      switch(ch) {
      case -1:
        break out;

      case ']':
        if (state == 0) {
          state = 1;
          break;
        } else if (state == 1) {
          state = 2;
          break;
        }
       
      case '>':
        if (state == 2) {
          break out;
        }

      default:
        if (state != 0) {
          if (state > 0) {
            buffer.append(']');
            if (state > 1) {
              buffer.append(']');
            }
          }
        }
        state = 0;
        buffer.append((char)ch);
        break;
      }
    }
   
    if (_documentHandler != null) {
      _documentHandler.handleCharacters(buffer.toString());
   
  }
 
 
 
  protected void readComment(String start) throws IOException
  {
    StringBuffer buffer = _buffer;
    int dashcount = 0;
    int count = 0;
    int ch;
   
    buffer.setLength(0);

    ch = read();
    if (start == null) {
      while(dashcount < 2 && ch == '-') {
        buffer.append((char)ch);
        dashcount++;
        ch = read();
      }
    } else {
      buffer.append(start);
    }

    while(ch != -1) {
   
      if (ch == '-') {
        if (dashcount > 0) {
          count++;
        }
      } else if (ch == '>') {
        if (count >= dashcount) {
          break;
        }
      } else {
        count = 0;
      }
     
      buffer.append((char)ch);
     
      ch = read();
     
    }

    if (_documentHandler != null) {
      _documentHandler.handleComment(buffer.toString());
   
  }
 

  protected int skipSpaces() throws IOException
  {
    int ch;
    do {
      ch = read();
    } while(ch != -1 && Character.isWhitespace((char)ch));
    return (int)ch;
  }


  protected void readCharacters(int ch) throws IOException
  {
    StringBuffer buffer = _buffer;
    buffer.setLength(0);
    while(ch != -1 && ch != '<') {
      buffer.append((char)ch);
      ch = read();
    }
    if (ch == '<') {
      unread(ch);
    }
    if (_documentHandler != null) {
      _documentHandler.handleCharacters(buffer.toString());
   
  }


  public void readPI() throws IOException
  {
    StringBuffer buffer = _buffer;
    int ch;
    int state = 0;
   
    buffer.setLength(0);
   
    while((ch = read()) != -1) {
      if (state == 0 && ch == '?') {
        state = 1;
      } else if (state == 1 && ch == '>') {
        break;
      } else {
        if (state == 1) {
          buffer.append('?');
          state = 0;
        }
        buffer.append((char)ch);
      }
    }
   
    if (_documentHandler != null) {
      _documentHandler.handleProcessingInstruction(buffer.toString());
   
  }
 


  protected  Tag readTag(int ch) throws IOException
  {
    StringBuffer buffer = _buffer;
    boolean hasEndSlash = false;
    Tag tag = null;
    buffer.setLength(0);
   
    while(ch != -1 && ch != '>' && !Character.isWhitespace((char)ch)) {
      if (ch == '/') {
        ch = read();
        if (ch == '>') {
          hasEndSlash = true;
          break;
        } else {
          unread(ch);
          ch = '/';
        }
      }
      buffer.append((char)ch);
      ch = read();
    }

    if (ch == -1) {
      return null;
    }

    tag = new Tag(buffer.toString());
    if (hasEndSlash) {
      tag.enableEndSlash();
    }
   
    if (ch == '>') {
      return tag;
    }

    while(true) {

      String name;

      ch = skipSpaces();
     
      if (ch == '/') {
        ch = read();
        if (ch == '>') {
          tag.enableEndSlash();
        } else {
          unread(ch);
          ch = '/';
        }
      }
     
      if (ch == '>' || ch == -1) {
        return tag;
      }

      buffer.setLength(0);
      while(ch != -1 && ch != '>' && ch != '=' && !Character.isWhitespace((char)ch)) {
        if (ch == '/') {
          ch = read();
          if (ch == '>') {
            tag.enableEndSlash();
            break;
          } else {
            unread(ch);
            ch = '/';
          }
        }
        buffer.append((char)ch);
        ch = read();
      }
      name = buffer.toString();

      if (ch == -1 || ch == '>') {

        tag.add(name);
        return tag;

      } else if (Character.isWhitespace((char)ch)) {

        ch = skipSpaces();
        if (ch == -1 || ch == '>') {
          tag.add(name);
          return tag;
        }

      }

      if (ch == '=') {

        ch = skipSpaces();
        if (ch == -1 || ch == '>') {
          return tag;
        }

        if (ch == '"') {

          buffer.setLength(0);
          ch = read();
          while(ch != -1 && ch != '"') {
            buffer.append((char)ch);
            ch = read();
          }
          tag.add(name, buffer.toString());

        } else if (ch == '\'') {

          buffer.setLength(0);
          ch = read();
          while(ch != -1 && ch != '\'') {
            buffer.append((char)ch);
            ch = read();
          }
          tag.add(name, buffer.toString());

        } else if (ch == '`') {

          buffer.setLength(0);
          ch = read();
          while(ch != -1 && ch != '`') {
            buffer.append((char)ch);
            ch = read();
          }
          tag.add(name, buffer.toString());

        } else {

          buffer.setLength(0);
          do {
            if (ch == '/') {
              ch = read();
              if (ch == '>') {
                tag.enableEndSlash();
                break;
              } else {
                unread(ch);
                ch = '/';
              }
            }
            buffer.append((char)ch);
            ch = read();
          } while(ch != -1 && ch != '>' && !Character.isWhitespace((char)ch));
          tag.add(name, buffer.toString());
          if (ch == '>') {  
            return tag;
          }

        }

        if (ch == -1) {
          return tag;
        }

      } else {

        unread(ch);
        tag.add(name);

      }

    } 
  }
 

  protected void dispatchElements() throws IOException
  {
    int ch;

    if (_documentHandler != null) {
      _documentHandler.setDocumentLocator(this);
      _documentHandler.startDocument();
   
     
    while(true) {
      _startLine = _line;
      _startColumn = _column;
     
      ch = read();
      if (ch == -1) {
        break;
       
      } else if (ch == '<') {
     
        ch = read();
        if (ch == '!') {
          ch = read();
          if (ch == '[') {
            readCDataSection();
          } else {
            unread(ch);
            readComment(null);
          }
         
        } else if (ch == '?') {
          readPI();
         
        } else {
          Tag tag = readTag(ch);
          if (_documentHandler != null) {
            _documentHandler.handleTag(tag);
         
        }
       
      } else {
        readCharacters(ch);
      }
    }

    if (_documentHandler != null) {
      _documentHandler.endDocument();
   
  }


  public Location getLocation()
  {
    return new Location(_url, _startLine, _startColumn);
  }



  public void parse(DocumentHandler documentHandler, InputSource source) throws IOException
  {
    parse(documentHandler, source.getURL(), source.getInputStream());
  }



  public void parse(DocumentHandler documentHandler, URL url, InputStream input) throws IOException
  {
 
    _documentHandler = documentHandler;
    _url = url;
   
    try {
   
      _inputStream = new PushbackInputStream(input);
      dispatchElements();
      _inputStream.close();
     
    } catch (IOException exception) {
   
      if (_inputStream != null) {
        _inputStream.close();
      }
      exception.fillInStackTrace();
      throw exception;
     
    } finally {
   
      _inputStream = null;
     
    }
  }


  public static String getAttributeValue(String data, String attribute)
  {
    int length = data.length();
    int attrLength = attribute.length();
    int i = data.indexOf(" " + attribute + "=");
    if ((i > 0) && ((i + attrLength + 2) < length)) {
      StringBuffer buffer = new StringBuffer();
      char ch;
      i += attrLength + 2;
      char quote = data.charAt(i);
      if ((quote == '\'') || (quote == '"')) {
        i++;
        while((i < length) && ((ch = data.charAt(i++)) != quote)) {
          buffer.append(ch);
        }
      } else {
        while((i < length) && !Character.isWhitespace(ch = data.charAt(i++))) {
          buffer.append(ch);
        }
      }
      return buffer.toString();

    } else {
      return null;
    }
  }
 
}
TOP

Related Classes of anvil.parser.Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.