Package riotcmd

Source Code of riotcmd.CmdLangParse$LangHandler

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package riotcmd;

import java.io.ByteArrayInputStream ;
import java.io.IOException ;
import java.io.InputStream ;
import java.util.HashMap ;
import java.util.Map ;
import java.util.Properties ;

import org.apache.log4j.PropertyConfigurator ;
import org.openjena.atlas.io.IO ;
import org.openjena.atlas.lib.Sink ;
import org.openjena.atlas.lib.SinkCounting ;
import org.openjena.atlas.lib.SinkNull ;
import org.openjena.atlas.lib.StrUtils ;
import org.openjena.riot.ErrorHandler ;
import org.openjena.riot.ErrorHandlerFactory ;
import org.openjena.riot.Lang ;
import org.openjena.riot.RiotException ;
import org.openjena.riot.RiotReader ;
import org.openjena.riot.SysRIOT ;
import org.openjena.riot.lang.LabelToNode ;
import org.openjena.riot.lang.LangRDFXML ;
import org.openjena.riot.lang.LangRIOT ;
import org.openjena.riot.lang.LangTurtleBase ;
import org.openjena.riot.out.NodeToLabel ;
import org.openjena.riot.out.SinkQuadOutput ;
import org.openjena.riot.out.SinkTripleOutput ;
import org.openjena.riot.pipeline.inf.InfFactory ;
import org.openjena.riot.pipeline.inf.InferenceSetupRDFS ;
import org.openjena.riot.system.RiotLib ;
import org.openjena.riot.system.SyntaxLabels ;
import org.openjena.riot.tokens.Tokenizer ;
import org.openjena.riot.tokens.TokenizerFactory ;
import arq.cmd.CmdException ;
import arq.cmdline.CmdGeneral ;
import arq.cmdline.ModLangParse ;
import arq.cmdline.ModTime ;

import com.hp.hpl.jena.Jena ;
import com.hp.hpl.jena.graph.Triple ;
import com.hp.hpl.jena.query.ARQ ;
import com.hp.hpl.jena.sparql.core.Quad ;

/** Common framework for running RIOT parsers */
public abstract class CmdLangParse extends CmdGeneral
{
    // We are not a TDB command but still set the logging.
    //static { CmdTDB.setLogging() ; }
    // Module.
    protected ModTime modTime                   = new ModTime() ;
    protected ModLangParse modLangParse         = new ModLangParse() ;
    protected InferenceSetupRDFS setup          = null ;
   
    interface LangHandler {
        String getItemsName() ;
        String getRateName() ;
    }

    static LangHandler langHandlerQuads = new LangHandler()
    {
        @Override
        public String getItemsName()        { return "quads" ; }
        @Override
        public String getRateName()         { return "QPS" ; }
    } ;
    static LangHandler langHandlerTriples = new LangHandler()
    {
        @Override
        public String getItemsName()        { return "triples" ; }
        @Override
        public String getRateName()         { return "TPS" ; }
    } ;
    static LangHandler langHandlerAny = new LangHandler()
    {
        @Override
        public String getItemsName()        { return "tuples" ; }
        @Override
        public String getRateName()         { return "TPS" ; }
    } ;
   
    protected static Map<Lang, LangHandler> dispatch = new HashMap<Lang, LangHandler>() ;
    static {
        for ( Lang lang : Lang.values())
        {
            if ( lang.isQuads() )
                dispatch.put(lang, langHandlerQuads) ;
            else
                dispatch.put(lang, langHandlerTriples) ;
        }
    }
   
    protected LangHandler langHandlerOverall = null ;

    // This is the setup for command for their message via the logging in ErrorHandlers
    private static final String log4Jsetup = StrUtils.strjoin("\n"
//                    , "## Plain output to stdout"
//                    , "log4j.appender.riot.plain=org.apache.log4j.ConsoleAppender"
//                    , "log4j.appender.riot.plain.target=System.out"
//                    , "log4j.appender.riot.plain.layout=org.apache.log4j.PatternLayout"
//                    , "log4j.appender.riot.plain.layout.ConversionPattern=%m%n"
                    , "## Plain output to stderr"
                    , "log4j.appender.riot.plainerr=org.apache.log4j.ConsoleAppender"
                    , "log4j.appender.riot.plainerr.target=System.err"
                    , "log4j.appender.riot.plainerr.layout=org.apache.log4j.PatternLayout"
                    , "log4j.appender.riot.plainerr.layout.ConversionPattern=%-5p %m%n"
                    , "## Everything"
                    , "log4j.rootLogger=INFO, riot.plainerr"
                    , "## Parser output"
                    , "log4j.additivity."+SysRIOT.riotLoggerName+"=false"
                    , "log4j.logger."+SysRIOT.riotLoggerName+"=ALL, riot.plainerr "
     ) ;

    /** Reset the logging to be good for command line tools */
    public static void setLogging()
    {
        // Turn off optimizer warning.
        // Use a plain logger for output.
        Properties p = new Properties() ;
        InputStream in = new ByteArrayInputStream(StrUtils.asUTF8bytes(log4Jsetup)) ;
        try { p.load(in) ; } catch (IOException ex) {}
        PropertyConfigurator.configure(p) ;
        //LogManager.getLogger(SysRIOT.riotLoggerName).setLevel(Level.ALL) ;
        System.setProperty("log4j.configuration", "set") ;
    }
   
   
    protected CmdLangParse(String[] argv)
    {
        super(argv) ;
        // As a command, we take control of logging ourselves.
        setLogging() ;
        super.addModule(modTime) ;
        super.addModule(modLangParse) ;
       
        super.modVersion.addClass(Jena.class) ;
        super.modVersion.addClass(ARQ.class) ;
        //super.modVersion.addClass(SysRIOT.class) ;
    }

    @Override
    protected String getSummary()
    {
        //return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] [--skip | --stopOnError] file ..." ;
        return getCommandName()+" [--time] [--check|--noCheck] [--sink] [--base=IRI] file ..." ;
    }

    @Override
    protected void processModulesAndArgs()
    { }

    protected long totalMillis = 0 ;
    protected long totalTuples = 0 ;
   

    @Override
    protected void exec()
    {
        if ( modLangParse.strictMode() )
        {
            SysRIOT.StrictXSDLexicialForms = true ;
            LangTurtleBase.strict = true ;
        }
       
        if ( modLangParse.getRDFSVocab() != null )
            setup = new InferenceSetupRDFS(modLangParse.getRDFSVocab()) ;
       
        try {
            if ( super.getPositional().isEmpty() )
                parseFile("-") ;
            else
            {
                for ( String fn : super.getPositional() )
                    parseFile(fn) ;
            }
        } finally {
            System.err.flush() ;
            System.out.flush() ;
            if ( super.getPositional().size() > 1 && modTime.timingEnabled() )
                output("Total", totalTuples, totalMillis, langHandlerOverall) ;
        }
    }

    public void parseFile(String filename)
    {
        InputStream in = null ;
        if ( filename.equals("-") )
            parseFile("http://base/", "stdin", System.in) ;
        else
        {
            try {
                in = IO.openFile(filename) ;
            } catch (Exception ex)
            {
                System.err.println("Can't open '"+filename+"' "+ex.getMessage()) ;
                return ;
            }
            parseFile(null, filename, in) ;
            IO.close(in) ;
        }
    }

    public void parseFile(String defaultBaseURI, String filename, InputStream in)
    {  
        String baseURI = modLangParse.getBaseIRI() ;
        if ( baseURI == null )
            baseURI = defaultBaseURI ;
        parseRIOT(baseURI, filename, in) ;
    }
   
    protected abstract Lang selectLang(String filename, Lang lang) ;

    protected void parseRIOT(String baseURI, String filename, InputStream in)
    {
        baseURI = RiotReader.chooseBaseIRI(baseURI, filename) ;
       
        boolean checking = true ;
        if ( modLangParse.explicitChecking() )  checking = true ;
        if ( modLangParse.explicitNoChecking() ) checking = false ;
       
        ErrorHandler errHandler = null ;
        if ( checking )
        {
            if ( modLangParse.stopOnBadTerm() )
                errHandler = ErrorHandlerFactory.errorHandlerStd  ;
            else
                // Try to go on if possible.  This is the default behaviour.
                errHandler = ErrorHandlerFactory.errorHandlerWarn ;
        }
       
        if ( modLangParse.skipOnBadTerm() )
        {
            // TODO skipOnBadterm
        }
       
        Lang lang = selectLang(filename, Lang.NQUADS)
        LangHandler handler = dispatch.get(lang) ;
        if ( handler == null )
            throw new CmdException("Undefined language: "+lang) ;
       
        // If multiple files, choose the overall labels.
        if ( langHandlerOverall == null )
            langHandlerOverall = handler ;
        else
        {
            if ( langHandlerOverall != langHandlerAny )
            {
                if ( langHandlerOverall != handler )
                    langHandlerOverall = langHandlerAny ;
            }
        }
       
        // Make a flag.
        // Input and output subflags.
        // If input is "label, then output using NodeToLabel.createBNodeByLabelRaw() ;
        // else use NodeToLabel.createBNodeByLabel() ;
        // Also, as URI.
        final boolean labelsAsGiven = false ;
       
        SinkCounting<?> sink ;
        LangRIOT parser ;
       
        NodeToLabel labels = SyntaxLabels.createNodeToLabel() ;
        if ( labelsAsGiven )
            labels = NodeToLabel.createBNodeByLabelEncoded() ;
       
        // Uglyness because quads and triples aren't subtype of some Tuple<Node>
        // That would change a lot (Triples came several years before Quads).
        if ( lang.isTriples() )
        {
            Sink<Triple> s = SinkNull.create() ;
            if ( ! modLangParse.toBitBucket() )
                s = new SinkTripleOutput(System.out, null, labels) ;
            if ( setup != null )
                s = InfFactory.infTriples(s, setup) ;
           
            SinkCounting<Triple> sink2 = new SinkCounting<Triple>(s) ;
           
            if ( lang.equals(Lang.RDFXML) )
                // Adapter round ARP RDF/XML reader.
                parser = LangRDFXML.create(in, baseURI, filename, errHandler, sink2) ;
            else
                parser = RiotReader.createParserTriples(in, lang, baseURI, sink2) ;
           
            sink = sink2 ;
        }
        else
        {
            Sink <Quad> s = SinkNull.create() ;
            if ( ! modLangParse.toBitBucket() )
                s = new SinkQuadOutput(System.out, null, labels) ;
            if ( setup != null )
                s = InfFactory.infQuads(s, setup) ;
           
            SinkCounting<Quad> sink2 = new SinkCounting<Quad>(s) ;
            parser = RiotReader.createParserQuads(in, lang, baseURI, sink2) ;
            sink = sink2 ;
        }
       
        // Pending log message having he filename in them.
        // output(filename) ;
        try
        {
            // Default behaviour is "check":
           
            if ( checking )
            {
                if ( parser.getLang() == Lang.NTRIPLES ||  parser.getLang() == Lang.NQUADS )
                    parser.setProfile(RiotLib.profile(baseURI, false, true, errHandler)) ;
                else
                    parser.setProfile(RiotLib.profile(baseURI, true, true, errHandler)) ;
            }
            else
                parser.setProfile(RiotLib.profile(baseURI, false, false, errHandler)) ;
           
            if ( labelsAsGiven )
                parser.getProfile().setLabelToNode(LabelToNode.createUseLabelAsGiven()) ;
           
            modTime.startTimer() ;
            parser.parse() ;
        }
        catch (RiotException ex)
        {
            // Should have handled the exception and logged a message by now.
            //System.err.println("++++"+ex.getMessage());
          
            if ( modLangParse.stopOnBadTerm() )
                return ;
        }
        finally {
            IO.close(in) ;
            sink.close() ;
        }
        long x = modTime.endTimer() ;
        long n = sink.getCount() ;
       

        if ( modTime.timingEnabled() )
            output(filename, n, x, handler) ;
       
        totalMillis += x ;
        totalTuples += n ;
    }
   
    protected Tokenizer makeTokenizer(InputStream in)
    {
        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(in) ;
        return tokenizer ;
    }
   
    protected void output(String label, long numberTriples, long timeMillis, LangHandler handler)
    {
        double timeSec = timeMillis/1000.0 ;
       
        System.out.flush() ;
        System.err.printf("%s : %,5.2f sec  %,d %s  %,.2f %s\n",
                          label,
                          timeMillis/1000.0, numberTriples,
                          handler.getItemsName(),
                          timeSec == 0 ? 0.0 : numberTriples/timeSec,
                          handler.getRateName()) ;
    }
   
    protected void output(String label)
    {
        System.err.printf("%s : \n", label) ;
    }
}
TOP

Related Classes of riotcmd.CmdLangParse$LangHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.