Package org.openjena.atlas.io

Source Code of org.openjena.atlas.io.PeekReader

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.openjena.atlas.io;

import static org.openjena.atlas.io.IO.EOF ;
import static org.openjena.atlas.io.IO.UNSET ;

import java.io.FileInputStream ;
import java.io.FileNotFoundException ;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.Reader ;

import org.openjena.atlas.AtlasException ;
import org.openjena.atlas.lib.Chars ;

import com.hp.hpl.jena.shared.JenaException ;

/** Parsing-centric reader.
*  This class is not thread safe.
* @see BufferingWriter
* @see PeekInputStream
*/

public final class PeekReader extends Reader
{
    // Remember to apply fixes to PeekInputStream as well.
   
    // Buffering is done by a CharStream - does i t make adifference?
    // Yes.  A lot (Java6).
   
    // Using a Reader here seems to have zero cost or benefit but CharStream allows fast String handling.
    private final CharStream source ;
   
    private static final int PUSHBACK_SIZE = 10 ;
    static final byte CHAR0 = (char)0 ;
   
    private char[] pushbackChars ;
    private int idxPushback ;                   // Index into pushbackChars: points to next pushBack. -1 => none.
   
    private int currChar = UNSET ;              // Next character to return when reading forwards.
    private long posn ;
   
    public static final int INIT_LINE = 1 ;
    public static final int INIT_COL = 1 ;
   
    private long colNum ;
    private long lineNum ;
   
    // ---- static construction methods.
   
    public static PeekReader make(Reader r)
    {
        if ( r instanceof PeekReader )
            return (PeekReader)r ;
        return make(r, CharStreamBuffered.CB_SIZE) ;
    }
   
    public static PeekReader make(Reader r, int bufferSize)
    {
//        if ( r instanceof BufferedReader )
//        {
//            // Already buffered - and we can't unbuffer it.
//            // Still worth our buffering because of the synchronized on one char reads
//            return new PeekReader(new CharStreamBuffered(r, bufferSize)) ;
//        }
        return new PeekReader(new CharStreamBuffered(r, bufferSize)) ;
        // Particularly slow to start with.
        //return new PeekReader(new CharStreamBasic(new BufferedReader(r, bufferSize))) ;
    }

    /** Make PeekReader where the input is UTF8 */
    public static PeekReader makeUTF8(InputStream in)
    {
        // This is the best route to make a PeekReader because it avoids
        // chances of wrong charset for a Reader say.
        PeekReader pr ;
        if ( true )
        {
            Reader r = IO.asUTF8(in) ;
            // This adds reader-level buffering
            pr = make(r) ;
        }
        else
        {
            // This is a bit slower - reason unknown.
            InputStreamBuffered in2 = new InputStreamBuffered(in) ;
            CharStream r = new InStreamUTF8(in2) ;
            pr = new PeekReader(r) ;
        }
        // Skip BOM.
        int ch = pr.peekChar() ;
        if ( ch == Chars.BOM )
            // Skip BOM
            pr.readChar() ;
        return pr ;
    }
   
    /** Make PeekReader where the input is ASCII */
    public static PeekReader makeASCII(InputStream in)
    {
//      InputStreamBuffered in2 = new InputStreamBuffered(in) ;
//      CharStream r = new StreamASCII(in2) ;
        Reader r = IO.asASCII(in) ;
        return make(r) ;
    }
   
    public static PeekReader make(CharStream r)
    {
        return new PeekReader(r) ;
    }
   
    public static PeekReader readString(String string)
    {
        return new PeekReader(new CharStreamSequence(string)) ;
    }
   
    public static PeekReader open(String filename)
    {
        try {
            InputStream in = new FileInputStream(filename) ;
            return makeUTF8(in) ;
        } catch (FileNotFoundException ex){ throw new AtlasException("File not found: "+filename) ; }
    }
   
    private PeekReader(CharStream stream)
    {
        this.source = stream ;
        this.pushbackChars = new char[PUSHBACK_SIZE] ;
        this.idxPushback = -1 ;
       
        this.colNum = INIT_COL ;
        this.lineNum = INIT_LINE ;
        this.posn = 0 ;
       
        // We start at character "-1", i.e. just before the file starts.
        // Advance always so that the peek character is valid (is character 0)
        // Returns the character before the file starts (i.e. UNSET).
    }
   
    public long getLineNum()            { return lineNum; }

    public long getColNum()             { return colNum; }

    public long getPosition()           { return posn; }

    //---- Do not access currChar except with peekChar/setCurrChar.
    public final int peekChar()
    {
        if ( idxPushback >= 0 )
            return pushbackChars[idxPushback] ;
       
        // If not started ... delayed initialization.
        if ( currChar == UNSET )
            init() ;
        return currChar ;
    }
   
    // And the correct way to read the currChar is to call peekChar.
    private final void setCurrChar(int ch)
    {
        currChar = ch ;
    }
   
    public final int readChar()               { return nextChar() ; }
   
    /** push back a character : does not alter underlying position, line or column counts*/ 
    public final void pushbackChar(int ch)    { unreadChar(ch) ; }
   
    // Reader operations
    @Override
    public final void close() throws IOException
    {
        source.closeStream() ;
    }

    @Override
    public final int read() throws IOException
    {
        if ( eof() )
            return EOF ;
        int x = readChar() ;
        return x ;
    }
   
    @Override
    public final int read(char[] cbuf, int off, int len) throws IOException
    {
        if ( eof() )
            return EOF ;
        // Note - need to preserve line count, so single char ops are reasonably efficient.
        for ( int i = 0 ; i < len ; i++ )
        {
            int ch = readChar() ;
            if ( ch == EOF )
                return (i==0)? EOF : i ;
            cbuf[i+off] = (char)ch ;
        }
        return len ;
    }

    public final boolean eof()   { return peekChar() == EOF ; }

    // ----------------
    // The methods below are the only ones to manipulate the character buffers.
    // Other methods may read the state of variables.
   
    private final void unreadChar(int ch)
    {
        // The push back buffer is in the order where [0] is the oldest.
        // Does not alter the line number, column number or position count.
       
        if ( idxPushback >= pushbackChars.length )
        {
            // Enlarge pushback buffer.
            char[] pushbackChars2 = new char[pushbackChars.length*2] ;
            System.arraycopy(pushbackChars, 0, pushbackChars2, 0, pushbackChars.length) ;
            pushbackChars = pushbackChars2 ;
            //throw new JenaException("Pushback buffer overflow") ;
        }
        if ( ch == EOF || ch == UNSET )
            throw new JenaException("Illegal character to push back: "+ch) ;
       
        idxPushback++ ;
        pushbackChars[idxPushback] = (char)ch ;
    }
   
    private final void init()
    {
        advanceAndSet() ;
        if ( currChar == UNSET )
            setCurrChar(EOF) ;
    }

    private final void advanceAndSet()
    {
        int ch = source.advance() ;
        setCurrChar(ch) ;
    }
   
   
    // Invariants.
    // currChar is either chars[idx-1] or pushbackChars[idxPushback]
   
    /** Return the next character, moving on one place and resetting the peek character */
    private final int nextChar()
    {
        int ch = peekChar() ;
       
        if ( ch == EOF )
            return EOF ;
       
        if ( idxPushback >= 0 )
        {
            char ch2 = pushbackChars[idxPushback] ;
            idxPushback-- ;
            return ch2 ;
        }

        posn++ ;
       
        if (ch == '\n')
        {
            lineNum++;
            colNum = INIT_COL ;
        }
        else
            colNum++;
       
        advanceAndSet() ;
        return ch ;
    }
}
TOP

Related Classes of org.openjena.atlas.io.PeekReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.