Package com.hp.hpl.jena.iri.impl

Source Code of com.hp.hpl.jena.iri.impl.Parser

/*
* (c) Copyright 2005 Hewlett-Packard Development Company, LP
* [See end of file]
*/

package com.hp.hpl.jena.iri.impl;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hp.hpl.jena.iri.IRI;
import com.hp.hpl.jena.iri.IRIComponents;
import com.hp.hpl.jena.iri.IRIFactory;
import com.hp.hpl.jena.iri.Violation;
import com.hp.hpl.jena.iri.ViolationCodes;
import com.ibm.icu.text.IDNA;
import com.ibm.icu.text.StringPrepParseException;

public class Parser implements IRIComponents, ViolationCodes {

    static Lexer lexers[][] = new Lexer[8][];
    static {
        for (int i = 0; i < lexers.length; i++)
            lexers[i] = new Lexer[] {
                    new LexerScheme((Reader) null),
                    new LexerUserinfo((Reader) null),
                    new LexerHost((Reader) null),
                    new LexerPort((Reader) null),
                    new LexerPath((Reader) null),
                    new LexerQuery((Reader) null),
                    new LexerFragment((Reader) null),
                    new LexerXHost((Reader) null),
                    };
    }

    static int nextLexer = 0;

    static boolean DEBUG = false;

    static final int fields[] = { SCHEME, USER, HOST, PORT, PATH, QUERY,
            FRAGMENT, AUTHORITY, PATHQUERY };

    static final int invFields[] = new int[32];
    static {
        for (int i = 0; i < invFields.length; i++)
            invFields[i] = -1;
        for (int i = 0; i < fields.length; i++)
            invFields[fields[i]] = i;
    }

    static final Pattern p = Pattern.compile("(([^:/?#]*):)?" + // scheme
            "(//((([^/?#@]*)@)?" + // user
            "(\\[[^/?#]*\\]|([^/?#:]*))?" + // host
            "(:([^/?#]*))?))?" + // port
            "([^#?]*)?" + // path
            "(\\?([^#]*))?" + // query
            "(#(.*))?", // frag
            Pattern.DOTALL);

    final Matcher m;

    final String uri;
   
    int found;

    final long errors[] = new long[fields.length];

    // public long allErrors = 0l;
    IRIImpl iri;
   
   
    public Parser(String uri, IRIImpl iri) {
        this.uri = uri;
        this.iri = iri;
        m = p.matcher(uri);
        if (!m.matches()) {
            throw new RuntimeException("not meant to happen");
        }
        Lexer[] lex = nextLexer();

        IRIFactoryImpl factory = iri.getFactory();
        if (!has(SCHEME)) {
//            recordError(SCHEME, RELATIVE_URI);
            iri.scheme = factory.noScheme();
        } else {
            iri.scheme = factory.getScheme(get(SCHEME).toLowerCase(),this);
        }
        for (int i = 0; i < lex.length-1; i++) {
            int range = fields[i];
            if (has(range)) {
                if ((range!=PATH && range != HOST)|| start(range)!=end(range) ) {
                    found |= 1<<range;  
                }
                lex[i].analyse(this, range);
                iri.scheme.analyse(this, range);
                if (range==PORT) {
                    try {
                      int port = Integer.parseInt(get(PORT));
                      if (iri.scheme.port()== port )
                          recordError(PORT,DEFAULT_PORT_SHOULD_BE_OMITTED);
                      if (port<1024 && port>=0)
                          recordError(PORT,PORT_SHOULD_NOT_BE_WELL_KNOWN);
                    }
                    catch (Exception e) {
                        // ignore
                    }
                }
            }
        }
        if (has(AUTHORITY))
            found |= 1<<AUTHORITY;
        iri.scheme.analyse(this,PATHQUERY);
        if ((errors[invFields[HOST]] & ((1l << DOUBLE_DASH_IN_REG_NAME)
                | (1l << ACE_PREFIX) | (1l << NON_URI_CHARACTER))) != 0) {

            String h = get(HOST);
            try {
                try {

                    IDNA.convertIDNToUnicode(IDNA.convertIDNToASCII(h,
                            IDNA.USE_STD3_RULES), IDNA.USE_STD3_RULES
                            | IDNA.ALLOW_UNASSIGNED);

                } catch (StringPrepParseException e) {
                    try {
                        IDNA.convertIDNToUnicode(IDNA.convertIDNToASCII(h,
                                IDNA.USE_STD3_RULES | IDNA.ALLOW_UNASSIGNED),
                                IDNA.USE_STD3_RULES | IDNA.ALLOW_UNASSIGNED);
                        recordError(HOST, BAD_IDN_UNASSIGNED_CHARS);
                    } catch (StringPrepParseException e1) {
                        recordError(HOST, BAD_IDN);
                    }

                }
            } catch (IndexOutOfBoundsException e) {
                recordError(HOST, BAD_IDN);
            }
        }
       
        hasComponents(~found & iri.scheme.getRequired(),
                REQUIRED_COMPONENT_MISSING);
        hasComponents(found & iri.scheme.getProhibited(),
          PROHIBITED_COMPONENT_PRESENT);

    }

    static private Lexer[] nextLexer() {
        Lexer lex[] = lexers[nextLexer];
        nextLexer = (nextLexer + 1) % lexers.length;
        return lex;
    }
   
    static LexerHost hostLexer() {
        return (LexerHost)nextLexer()[2];
    }
   
    private void hasComponents(int errorComponents,int eCode) {
        if (errorComponents==0)
            return;
        int i=0;
        while (errorComponents!=0) {
            if ((errorComponents&1)==1)
                recordError(i,eCode);
            i++;
            errorComponents >>= 1;
        }
    }

    public boolean has(int f) {
        return m.start(f) != -1;
    }

    public int start(int f) {
        return m.start(f==PATHQUERY?PATH:f);
    }

    public int end(int f) {
        if (f!=PATHQUERY)return m.end(f);
        return has(QUERY)?end(QUERY):end(PATH);
    }

    public String get(int f) {
        if (f!=PATHQUERY)return m.group(f);
        return uri.substring(start(PATH),end(PATHQUERY));
    }

    static private void show(IRI iri) {

        System.out.println("Scheme: " + iri.getScheme());

        System.out.println("Authority: " + iri.getRawAuthority());

        System.out.println("User: " + iri.getRawUserinfo());

        System.out.println("Host: " + iri.getRawHost());
        System.out.println("Port: " + iri.getPort());

        System.out.println("Path: " + iri.getRawPath());

        System.out.println("Query: " + iri.getRawQuery());

        System.out.println("Fragment: " + iri.getRawFragment());
        Iterator it = ((AbsIRIImpl) iri).allViolations();
        while (it.hasNext()) {
            System.out.println(((Violation) it.next()).getLongMessage());
        }
        it = ((AbsIRIImpl) iri).violations(true);
        while (it.hasNext()) {
            System.out.println("+"+((Violation) it.next()).getLongMessage());
        }
        it = ((AbsIRIImpl) iri).violations(false);
        while (it.hasNext()) {
            System.out.println("++"+((Violation) it.next()).getLongMessage());
        }
    }

    static public void main(String args[]) throws IOException {
        LineNumberReader in = new LineNumberReader(new InputStreamReader(
                System.in));
        IRIImpl last = null;
        DEBUG = true;

        IRIFactory factory = IRIFactory.iriImplementation();
        while (true) {
            String s = in.readLine().trim();
            if (s.equals("quit"))
                return;
            IRIImpl iri = (IRIImpl) factory.create(s);
            show(iri);

            if (last != null) {
                IRI r = last.create(iri);
                System.out.println("Resolved: " + r.toString());
                show(r);
            }
            last = iri;
        }
    }

    public void recordError(int range, int e) {
        errors[invFields[range]] |= (1l << e);
        iri.allErrors |= (1l << e);
    }
   
    long errors(int r) {
        return errors[invFields[r]];
    }

    public void matchedRule(int range, int rule) {
        if (DEBUG)
            System.err.println("Rule " + rule + " in range " + range);

    }

    public void matchedRule(int range, int rule, String string) {
        if (DEBUG)
            System.err.println("Rule " + rule + " in range " + range
                    + " yytext: '" + string + "'");

    }

}

/*
* (c) Copyright 2005 Hewlett-Packard Development Company, LP All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer. 2. Redistributions in
* binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other
* materials provided with the distribution. 3. The name of the author may not
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

 
TOP

Related Classes of com.hp.hpl.jena.iri.impl.Parser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.