Package com.hp.hpl.jena.sparql.engine.optimizer

Source Code of com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.hp.hpl.jena.sparql.engine.optimizer;

import static com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternElements.* ;

import java.io.OutputStream ;
import java.util.ArrayList ;
import java.util.HashMap ;
import java.util.List ;
import java.util.Map ;

import org.apache.jena.atlas.io.IndentedWriter ;
import org.apache.jena.atlas.logging.Log ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.graph.Triple ;
import com.hp.hpl.jena.sparql.ARQException ;
import com.hp.hpl.jena.sparql.core.Var ;
import com.hp.hpl.jena.sparql.engine.optimizer.reorder.PatternTriple ;
import com.hp.hpl.jena.sparql.graph.NodeConst ;
import com.hp.hpl.jena.sparql.sse.Item ;
import com.hp.hpl.jena.sparql.sse.ItemException ;
import com.hp.hpl.jena.sparql.sse.ItemList ;
import com.hp.hpl.jena.sparql.sse.SSE ;

/** Stats format:<pre>(stats
*    (meta ...)
*    ((S P O) weight)
*    (<predicate uri> weight)
*  )</pre>
* where <code>S</code>, <code>P</code>, <code>O</code> is a URI, variable,
* literal or one of the words <code>ANY</code> (matches anything),
* <code>VAR</code> (matches a variable), <code>TERM</code> (matches a
* fixed URI, or literal), <code>URI</code>, <code>BNODE</code>,
* <code>LITERAL</code> (matches one of these types).   
*/

public final class StatsMatcher
{
    private static Logger log = LoggerFactory.getLogger(StatsMatcher.class) ;
    public static final String STATS    = "stats" ;
    public static final String META     = "meta" ;
    public static final String COUNT    = "count" ;
    public static final Item OTHER      = Item.createSymbol("other") ;
    private static double NOMATCH       = -1 ;
   
    private static class Match
    {
        double weight = NOMATCH ;
        int exactMatches = 0 ;
        int termMatches = 0 ;
        int varMatches = 0 ;
        int anyMatches = 0 ;
    }

    // General structure
    List<Pattern> patterns = new ArrayList<Pattern>() ;
    // Map keyed by P for faster lookup (if no P available, we'll use the full list). 
    Map<Item, List<Pattern>> mapPatterns = new HashMap<Item,  List<Pattern>>() ;
   
    // Default behaviour
    double DefaultMatch = NOMATCH ;
   
    long count = -1 ;
   
    public StatsMatcher() {}
   
    public StatsMatcher(String filename)
    {
        try {
            Item stats = SSE.readFile(filename) ;
            if ( stats.isNil() )
            {
                Log.warn(this, "Empty stats file: "+filename) ;
                return ;
            }
            if ( !stats.isTagged(STATS) )
                throw new ARQException("Not a stats file: "+filename) ;
            init(stats) ;
        } catch (ItemException ex)
        {  // Debug
            throw ex ;
        }
    }
   
    public StatsMatcher(Item stats)
    { init(stats) ; }
   
    private void init(Item stats)
    {
        if ( !stats.isTagged(STATS) )
            throw new ARQException("Not a tagged '"+STATS+"'") ;

        ItemList list = stats.getList().cdr();      // Skip tag
       
        if ( list.car().isTagged(META) )
        {       
            // Process the meta tag.
            Item elt1 = list.car();
            list = list.cdr();      // Move list on

            // Get count.
            Item x = Item.find(elt1.getList(), COUNT) ;
            if ( x != null )
                count = x.getList().get(1).asInteger() ;
        }
      
        while (!list.isEmpty())
        {
            Item elt = list.car() ;
            list = list.cdr();
            onePattern(elt) ;
        }
    }
    
    private void onePattern(Item elt)
    {
        Item pat = elt.getList().get(0) ;

        if (pat.isNode())
        {
            // (<uri> weight)
            Node n = pat.getNode() ;
            if (!n.isURI())
            {
                log.warn("Not a preicate URI: " + pat.toString()) ;
                return ;
            }
            addAbbreviation(elt) ;
        }
        else if (pat.isSymbol())
        {
            if ( pat.equals(OTHER) )
            {
                double d = elt.getList().get(1).getDouble() ;
                DefaultMatch = d ;
                return ;
            }
           
            if ( pat.equals(BNODE) || pat.equals(LITERAL) )
            {
                log.warn("Not a match for a predicate URI: " + pat.toString()) ;
                return ;
            }
            if ( pat.equals(TERM) || pat.equals(VAR) || pat.equals(ANY) )
                addAbbreviation(elt) ;
            else
            {
                log.warn("Not understood: " + pat) ;
                return ;
            }
        }
        else if (pat.isList() && pat.getList().size() == 3)
        {
            // It's of the form ((S P O) weight)
            Item w = elt.getList().get(1) ;
            Pattern pattern = new Pattern(((Number)(w.getNode().getLiteralValue())).doubleValue(),
                                          intern(pat.getList().get(0)), intern(pat.getList().get(1)),
                                          intern(pat.getList().get(2))) ;
            addPattern(pattern) ;
        }
        else
        {
            log.warn("Unrecognized pattern: " + pat) ;
        }
    }
   
    private void addAbbreviation(Item elt)
    {
        Item predicateTerm = elt.getList().get(0) ;
        // Single node - it's a predicate abbreviate.
        double numProp = elt.getList().get(1).getDouble() ;
       
        if ( count < 100 )
            addPatternsSmall(predicateTerm, numProp) ;
        else
            addPatterns(predicateTerm, numProp) ;
    }
   
    // Knowing ?PO is quite important - it ranges from IFP (1) to
    // rdf:type rdf:Resource (potentially everything).

    public static final double weightSP = 2 ;
    public static final double weightPO = 10 ;
    public static final double weightTypeO = 1000 ; // ? rdf:type <Object> -- Avoid as can be very, very bad.
   
    public static final double weightSP_small = 2 ;
    public static final double weightPO_small = 4 ;
    public static final double weightTypeO_small = 40 ;
   
    /** Add patterns based solely on the predicate count and some guessing */ 
    public void addPatterns(Node predicate, double numProp)
    {
        addPatterns(Item.createNode(predicate),  numProp) ;
    }
   
    /** Add patterns based solely on the predicate count and some guessing for a small graph
     * (less than a few thousand triples)
     */ 
    public void addPatternsSmall(Node predicate, double numProp)
    {
        addPatternsSmall(Item.createNode(predicate),  numProp) ;
    }
   
    private void addPatterns(Item predicate, double numProp)
    {
        double wSP = weightSP ;
        double wPO = weightPO ;
        wPO = Math.min(numProp, wPO) ;
        wSP = Math.min(numProp, wSP) ;
       
        if ( NodeConst.nodeRDFType.equals(predicate.getNode()) )
            // ? rdf:type <Object> -- Avoid as can be very, very bad.
            wPO = weightTypeO ;
        addPatterns(predicate, numProp, wSP, wPO) ;
    }
   
    private void addPatternsSmall(Item predicate, double numProp)
    {
        double wSP = weightSP_small ;
        double wPO = weightPO_small ;
        wPO = Math.min(numProp, wPO) ;
        wSP = Math.min(numProp, wSP) ;

       
        if ( predicate.isNode() && NodeConst.nodeRDFType.equals(predicate.getNode()) )
            wPO = weightTypeO_small ;
        addPatterns(predicate, numProp, wSP, wPO) ;
    }

    private void addPatterns(Item predicate, double wP, double wSP, double wPO)
    {
        addPattern(new Pattern(wSP, TERM, predicate, ANY)) ;     // S, P, ? : approx weight
        addPattern(new Pattern(wPO,  ANY, predicate, TERM)) ;    // ?, P, O : approx weight
        addPattern(new Pattern(wP,   ANY, predicate, ANY)) ;     // ?, P, ?
    }

    public void addPattern(Pattern pattern)
    {
        // Check for named variables whch should not appear in a Pattern
        check(pattern) ;
       
        patterns.add(pattern) ;
       
        List<Pattern> entry = mapPatterns.get(pattern.predItem) ;
        if ( entry == null )
        {
            entry = new ArrayList<Pattern>() ;
            mapPatterns.put(pattern.predItem, entry ) ;
        }
        entry.add(pattern) ;
    }
   
//    public void addPattern(Triple triple)
//    {
//        if ( triple.getSubject().isVariable() )
//        {
//            // PO, P and O
//        }
//        else
//        {
//            //SPO, SP and SO
//        }
//        throw new NotImplementedException("StatsMatcher.addPattern") ;
//    }
   
    private static void check(Pattern pattern)
    {
        check(pattern.subjItem) ;
        check(pattern.predItem) ;
        check(pattern.objItem) ;
    }

    private static void check(Item item)
    {
        if ( Var.isVar(item.getNode()) )
            throw new ARQException("Explicit variable used in a pattern (use VAR): "+item.getNode()) ;
    }

    private Item intern(Item item)
    {
        if ( item.sameSymbol(ANY.getSymbol()) )         return ANY ;
        if ( item.sameSymbol(VAR.getSymbol()) )         return VAR ;
        if ( item.sameSymbol(TERM.getSymbol()) )        return TERM ;
        if ( item.sameSymbol(URI.getSymbol()) )         return URI ;
        if ( item.sameSymbol(LITERAL.getSymbol()) )     return LITERAL ;
        if ( item.sameSymbol(BNODE.getSymbol()) )       return BNODE ;
        return item ;
    }
   
    public double match(Triple t)
    {
        return match(Item.createNode(t.getSubject()),
                     Item.createNode(t.getPredicate()),
                     Item.createNode(t.getObject())) ;
    }

    public double match(PatternTriple pTriple)
    {
        return match(pTriple.subject, pTriple.predicate, pTriple.object) ;
    }
   
    /** Return the matching weight for the first triple match found,
     * else apply default value for fixed, unknnown predciate,
     * else return NOMATCH
     */
    public double match(Item subj, Item pred, Item obj)
    {
        double m = matchWorker(subj, pred, obj) ;
        if ( m == NOMATCH && pred.isNodeURI() )
            m = DefaultMatch ;
        //System.out.println("("+subj+" "+pred+" "+obj+") => "+m) ;
        return m ;
    }
   
    private double matchWorker(Item subj, Item pred, Item obj)
    {
        if ( isSet(subj) && isSet(pred) && isSet(obj) )
            // A set of triples ...
            return 1.0 ;
       
        // A predicate can be :
        //   A URI      - search on that URI, the TERM and ANY chains.
        //   A variable - search on that VAR and ANY chains.
       
        if ( pred.isNodeURI() )
        {
            double w = NOMATCH ;
            w = search(pred, subj, pred, obj, w) ;
            w = search(TERM, subj, pred, obj, w) //??
            w = search(ANY, subj, pred, obj, w) ;   //??
            return w ;
        }
       
        if ( pred.isVar() )
        {
            double w = NOMATCH ;
            w = search(VAR, subj, pred, obj, w) ;
            w = search(ANY, subj, pred, obj, w) ;
            return w ;
        }
       
        if ( pred.equals(TERM) )
        {
            double w = NOMATCH ;
            w = search(TERM, subj, pred, obj, w) ;
            w = search(ANY, subj, pred, obj, w) ;
            return w ;
        }
       
        if ( pred.equals(ANY) )
        {
            throw new ARQException("Predicate is ANY") ;
           
            //double w = matchLinear(patterns, subj, pred, obj) ;
//            double w = NOMATCH ;
//            w = search(VAR, subj, pred, obj, w) ;
//            w = search(ANY, subj, pred, obj, w) ;
//            return w ;
        }
       
        throw new ARQException("Unidentified predicate: "+pred+" in ("+subj+" "+pred+" "+obj+")") ;
       
        //return matchLinear(subj, pred, obj) ;
    }
   

    private double search(Item key, Item subj, Item pred, Item obj, double oldMin)
    {
        List<Pattern> entry = mapPatterns.get(key) ;
        if ( entry == null )
            return oldMin ;
        double w = matchLinear(entry, subj, pred, obj) ;
        return minPos(w, oldMin) ;
    }
   
    //Minimum respecting NOMATCH for "not known"
    private static double minPos(double x, double y)
    {
        if ( x == NOMATCH ) return y ;
        if ( y == NOMATCH ) return x ;
        return Math.min(x, y) ;
    }
   
    private static double matchLinear(List<Pattern> patterns, Item subj, Item pred, Item obj)
    {
        for ( Pattern pattern : patterns )
        {
            Match match = new Match() ;
            if ( ! matchNode(subj, pattern.subjItem, match) )
                continue ;
            if ( ! matchNode(pred, pattern.predItem, match) )
                continue ;
            if ( ! matchNode(obj, pattern.objItem, match) )
                continue ;
            // First match.
            return pattern.weight ;
        }
        return NOMATCH ;
    }
   
    private static boolean matchNode(Item node, Item item, Match details)
    {
        if ( isAny(item) )
        {
            details.anyMatches ++ ;
            return true ;
        }
       
        if ( isAnyVar(item) )
        {
            details.varMatches ++ ;
            return true ;
        }

        if ( node.isSymbol() )
        {
            //TERM in the thing to be matched means something concrete will be there.
            if ( node.equals(TERM) )
            {
                if ( item.equals(TERM) )
                {
                    details.termMatches ++ ;
                    return true ;
                }
                // Does not match LITERAL, URI, BNODE and VAR/ANY were done above.
                return false ;
            }

            throw new ARQException("StatsMatcher: unexpected slot type: "+node) ;
        }
       
        if ( ! node.isNode() )
            return false ;
      
        Node n = node.getNode() ;
        if n.isConcrete() )
        {
            if ( item.isNode() && item.getNode().equals(n) )
            {
                details.exactMatches ++ ;
                return true ;
            }
       
            if ( isAnyTerm(item) )
            {
                details.termMatches ++ ;
                return true ;
            }
           
            if ( isAnyURI(item) && n.isURI() )
            {
                details.termMatches ++ ;
                return true ;
            }
            if ( isAnyLiteral(item) && n.isLiteral() )
            {
                details.termMatches ++ ;
                return true ;
            }
            if ( isAnyBNode(item) && n.isBlank() )
            {
                details.termMatches ++ ;
                return true ;
            }
        }
        return false ;
    }
   
    @Override
    public String toString()
    {
        String $ = "" ;
        for ( Pattern p : patterns )
            $ = $+p+"\n" ;
        return $ ;
    }
   
    public void printSSE(OutputStream ps)
    {
        IndentedWriter out = new IndentedWriter(ps) ;
        out.println("(stats") ;
        out.incIndent() ;
        for ( Pattern p : patterns )
        {
            p.output(out) ;
            out.println();
        }
        out.decIndent() ;
        out.println(")") ;
        out.flush();
    }
}
TOP

Related Classes of com.hp.hpl.jena.sparql.engine.optimizer.StatsMatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.